blob: 99b27e297d47ed553c38b18a09fd2940913fa540 [file] [log] [blame]
Marat Dukhanef47f8d2021-07-02 15:08:32 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8//
9// Auto-generated file. Do not edit!
10// Specification: test/qu8-igemm-minmax-fp32.yaml
11// Generator: tools/generate-gemm-test.py
12
13
14#include <gtest/gtest.h>
15
16#include <xnnpack/common.h>
17#include <xnnpack/isa-checks.h>
18
19#include <xnnpack/gemm.h>
20#include <xnnpack/igemm.h>
21#include <xnnpack/ppmm.h>
22#include "gemm-microkernel-tester.h"
23
24
Marat Dukhan69c8a292021-07-14 19:34:56 -070025#if XNN_ARCH_ARM || XNN_ARCH_ARM64
26 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_eq_8) {
27 TEST_REQUIRES_ARM_NEON;
28 GemmMicrokernelTester()
29 .mr(1)
30 .nr(16)
31 .kr(1)
32 .sr(1)
33 .m(1)
34 .n(16)
35 .k(8)
36 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
37 }
38
39 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, strided_cn) {
40 TEST_REQUIRES_ARM_NEON;
41 GemmMicrokernelTester()
42 .mr(1)
43 .nr(16)
44 .kr(1)
45 .sr(1)
46 .m(1)
47 .n(16)
48 .k(8)
49 .cn_stride(19)
50 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
51 }
52
53 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_eq_8_subtile) {
54 TEST_REQUIRES_ARM_NEON;
55 for (uint32_t m = 1; m <= 1; m++) {
56 for (uint32_t n = 1; n <= 16; n++) {
57 GemmMicrokernelTester()
58 .mr(1)
59 .nr(16)
60 .kr(1)
61 .sr(1)
62 .m(m)
63 .n(n)
64 .k(8)
65 .iterations(1)
66 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
67 }
68 }
69 }
70
71 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_eq_8_subtile_m) {
72 TEST_REQUIRES_ARM_NEON;
73 for (uint32_t m = 1; m <= 1; m++) {
74 GemmMicrokernelTester()
75 .mr(1)
76 .nr(16)
77 .kr(1)
78 .sr(1)
79 .m(m)
80 .n(16)
81 .k(8)
82 .iterations(1)
83 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
84 }
85 }
86
87 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_eq_8_subtile_n) {
88 TEST_REQUIRES_ARM_NEON;
89 for (uint32_t n = 1; n <= 16; n++) {
90 GemmMicrokernelTester()
91 .mr(1)
92 .nr(16)
93 .kr(1)
94 .sr(1)
95 .m(1)
96 .n(n)
97 .k(8)
98 .iterations(1)
99 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
100 }
101 }
102
103 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_lt_8) {
104 TEST_REQUIRES_ARM_NEON;
105 for (size_t k = 1; k < 8; k++) {
106 GemmMicrokernelTester()
107 .mr(1)
108 .nr(16)
109 .kr(1)
110 .sr(1)
111 .m(1)
112 .n(16)
113 .k(k)
114 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
115 }
116 }
117
118 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_lt_8_subtile) {
119 TEST_REQUIRES_ARM_NEON;
120 for (size_t k = 1; k < 8; k++) {
121 for (uint32_t m = 1; m <= 1; m++) {
122 for (uint32_t n = 1; n <= 16; n++) {
123 GemmMicrokernelTester()
124 .mr(1)
125 .nr(16)
126 .kr(1)
127 .sr(1)
128 .m(m)
129 .n(n)
130 .k(k)
131 .iterations(1)
132 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
133 }
134 }
135 }
136 }
137
138 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_gt_8) {
139 TEST_REQUIRES_ARM_NEON;
140 for (size_t k = 9; k < 16; k++) {
141 GemmMicrokernelTester()
142 .mr(1)
143 .nr(16)
144 .kr(1)
145 .sr(1)
146 .m(1)
147 .n(16)
148 .k(k)
149 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
150 }
151 }
152
153 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_gt_8_subtile) {
154 TEST_REQUIRES_ARM_NEON;
155 for (size_t k = 9; k < 16; k++) {
156 for (uint32_t m = 1; m <= 1; m++) {
157 for (uint32_t n = 1; n <= 16; n++) {
158 GemmMicrokernelTester()
159 .mr(1)
160 .nr(16)
161 .kr(1)
162 .sr(1)
163 .m(m)
164 .n(n)
165 .k(k)
166 .iterations(1)
167 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
168 }
169 }
170 }
171 }
172
173 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_div_8) {
174 TEST_REQUIRES_ARM_NEON;
175 for (size_t k = 16; k <= 80; k += 8) {
176 GemmMicrokernelTester()
177 .mr(1)
178 .nr(16)
179 .kr(1)
180 .sr(1)
181 .m(1)
182 .n(16)
183 .k(k)
184 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
185 }
186 }
187
188 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_div_8_subtile) {
189 TEST_REQUIRES_ARM_NEON;
190 for (size_t k = 16; k <= 80; k += 8) {
191 for (uint32_t m = 1; m <= 1; m++) {
192 for (uint32_t n = 1; n <= 16; n++) {
193 GemmMicrokernelTester()
194 .mr(1)
195 .nr(16)
196 .kr(1)
197 .sr(1)
198 .m(m)
199 .n(n)
200 .k(k)
201 .iterations(1)
202 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
203 }
204 }
205 }
206 }
207
208 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, n_gt_16) {
209 TEST_REQUIRES_ARM_NEON;
210 for (uint32_t n = 17; n < 32; n++) {
211 for (size_t k = 1; k <= 40; k += 9) {
212 GemmMicrokernelTester()
213 .mr(1)
214 .nr(16)
215 .kr(1)
216 .sr(1)
217 .m(1)
218 .n(16)
219 .k(k)
220 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
221 }
222 }
223 }
224
225 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, n_gt_16_strided_cn) {
226 TEST_REQUIRES_ARM_NEON;
227 for (uint32_t n = 17; n < 32; n++) {
228 for (size_t k = 1; k <= 40; k += 9) {
229 GemmMicrokernelTester()
230 .mr(1)
231 .nr(16)
232 .kr(1)
233 .sr(1)
234 .m(1)
235 .n(16)
236 .k(k)
237 .cn_stride(19)
238 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
239 }
240 }
241 }
242
243 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, n_gt_16_subtile) {
244 TEST_REQUIRES_ARM_NEON;
245 for (uint32_t n = 17; n < 32; n++) {
246 for (size_t k = 1; k <= 40; k += 9) {
247 for (uint32_t m = 1; m <= 1; m++) {
248 GemmMicrokernelTester()
249 .mr(1)
250 .nr(16)
251 .kr(1)
252 .sr(1)
253 .m(m)
254 .n(n)
255 .k(k)
256 .iterations(1)
257 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
258 }
259 }
260 }
261 }
262
263 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, n_div_16) {
264 TEST_REQUIRES_ARM_NEON;
265 for (uint32_t n = 32; n <= 48; n += 16) {
266 for (size_t k = 1; k <= 40; k += 9) {
267 GemmMicrokernelTester()
268 .mr(1)
269 .nr(16)
270 .kr(1)
271 .sr(1)
272 .m(1)
273 .n(16)
274 .k(k)
275 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
276 }
277 }
278 }
279
280 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, n_div_16_strided_cn) {
281 TEST_REQUIRES_ARM_NEON;
282 for (uint32_t n = 32; n <= 48; n += 16) {
283 for (size_t k = 1; k <= 40; k += 9) {
284 GemmMicrokernelTester()
285 .mr(1)
286 .nr(16)
287 .kr(1)
288 .sr(1)
289 .m(1)
290 .n(n)
291 .k(k)
292 .cn_stride(19)
293 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
294 }
295 }
296 }
297
298 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, n_div_16_subtile) {
299 TEST_REQUIRES_ARM_NEON;
300 for (uint32_t n = 32; n <= 48; n += 16) {
301 for (size_t k = 1; k <= 40; k += 9) {
302 for (uint32_t m = 1; m <= 1; m++) {
303 GemmMicrokernelTester()
304 .mr(1)
305 .nr(16)
306 .kr(1)
307 .sr(1)
308 .m(m)
309 .n(n)
310 .k(k)
311 .iterations(1)
312 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
313 }
314 }
315 }
316 }
317
318 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, small_kernel) {
319 TEST_REQUIRES_ARM_NEON;
320 for (size_t k = 1; k <= 40; k += 9) {
321 GemmMicrokernelTester()
322 .mr(1)
323 .nr(16)
324 .kr(1)
325 .sr(1)
326 .m(1)
327 .n(16)
328 .k(k)
329 .ks(3)
330 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
331 }
332 }
333
334 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, small_kernel_subtile) {
335 TEST_REQUIRES_ARM_NEON;
336 for (size_t k = 1; k <= 40; k += 9) {
337 for (uint32_t m = 1; m <= 1; m++) {
338 for (uint32_t n = 1; n <= 16; n++) {
339 GemmMicrokernelTester()
340 .mr(1)
341 .nr(16)
342 .kr(1)
343 .sr(1)
344 .m(m)
345 .n(n)
346 .k(k)
347 .ks(3)
348 .iterations(1)
349 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
350 }
351 }
352 }
353 }
354
355 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, n_gt_16_small_kernel) {
356 TEST_REQUIRES_ARM_NEON;
357 for (uint32_t n = 17; n < 32; n++) {
358 for (size_t k = 1; k <= 40; k += 9) {
359 GemmMicrokernelTester()
360 .mr(1)
361 .nr(16)
362 .kr(1)
363 .sr(1)
364 .m(1)
365 .n(16)
366 .k(k)
367 .ks(3)
368 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
369 }
370 }
371 }
372
373 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, n_div_16_small_kernel) {
374 TEST_REQUIRES_ARM_NEON;
375 for (uint32_t n = 32; n <= 48; n += 16) {
376 for (size_t k = 1; k <= 40; k += 9) {
377 GemmMicrokernelTester()
378 .mr(1)
379 .nr(16)
380 .kr(1)
381 .sr(1)
382 .m(1)
383 .n(16)
384 .k(k)
385 .ks(3)
386 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
387 }
388 }
389 }
390
391 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, strided_cm_subtile) {
392 TEST_REQUIRES_ARM_NEON;
393 for (size_t k = 1; k <= 40; k += 9) {
394 for (uint32_t m = 1; m <= 1; m++) {
395 for (uint32_t n = 1; n <= 16; n++) {
396 GemmMicrokernelTester()
397 .mr(1)
398 .nr(16)
399 .kr(1)
400 .sr(1)
401 .m(m)
402 .n(n)
403 .k(k)
404 .cm_stride(19)
405 .iterations(1)
406 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
407 }
408 }
409 }
410 }
411
412 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, a_offset) {
413 TEST_REQUIRES_ARM_NEON;
414 for (size_t k = 1; k <= 40; k += 9) {
415 GemmMicrokernelTester()
416 .mr(1)
417 .nr(16)
418 .kr(1)
419 .sr(1)
420 .m(1)
421 .n(16)
422 .k(k)
423 .ks(3)
424 .a_offset(43)
425 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
426 }
427 }
428
429 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, zero) {
430 TEST_REQUIRES_ARM_NEON;
431 for (uint32_t mz = 0; mz < 1; mz++) {
432 for (size_t k = 1; k <= 40; k += 9) {
433 GemmMicrokernelTester()
434 .mr(1)
435 .nr(16)
436 .kr(1)
437 .sr(1)
438 .m(1)
439 .n(16)
440 .k(k)
441 .ks(3)
442 .a_offset(43)
443 .zero_index(mz)
444 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
445 }
446 }
447 }
448
449 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, qmin) {
450 TEST_REQUIRES_ARM_NEON;
451 GemmMicrokernelTester()
452 .mr(1)
453 .nr(16)
454 .kr(1)
455 .sr(1)
456 .m(1)
457 .n(16)
458 .k(8)
459 .qmin(128)
460 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
461 }
462
463 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, qmax) {
464 TEST_REQUIRES_ARM_NEON;
465 GemmMicrokernelTester()
466 .mr(1)
467 .nr(16)
468 .kr(1)
469 .sr(1)
470 .m(1)
471 .n(16)
472 .k(8)
473 .qmax(128)
474 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
475 }
476
477 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, strided_cm) {
478 TEST_REQUIRES_ARM_NEON;
479 GemmMicrokernelTester()
480 .mr(1)
481 .nr(16)
482 .kr(1)
483 .sr(1)
484 .m(1)
485 .n(16)
486 .k(8)
487 .cm_stride(19)
488 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
489 }
490
491 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, no_a_zero_point) {
492 TEST_REQUIRES_ARM_NEON;
493 for (size_t k = 1; k <= 40; k += 9) {
494 GemmMicrokernelTester()
495 .mr(1)
496 .nr(16)
497 .kr(1)
498 .sr(1)
499 .m(1)
500 .n(16)
501 .k(k)
502 .a_zero_point(0)
503 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
504 }
505 }
506
507 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, no_b_zero_point) {
508 TEST_REQUIRES_ARM_NEON;
509 for (size_t k = 1; k <= 40; k += 9) {
510 GemmMicrokernelTester()
511 .mr(1)
512 .nr(16)
513 .kr(1)
514 .sr(1)
515 .m(1)
516 .n(16)
517 .k(k)
518 .b_zero_point(0)
519 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
520 }
521 }
522
523 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, no_zero_point) {
524 TEST_REQUIRES_ARM_NEON;
525 for (size_t k = 1; k <= 40; k += 9) {
526 GemmMicrokernelTester()
527 .mr(1)
528 .nr(16)
529 .kr(1)
530 .sr(1)
531 .m(1)
532 .n(16)
533 .k(k)
534 .a_zero_point(0)
535 .b_zero_point(0)
536 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
537 }
538 }
539#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
540
541
542#if XNN_ARCH_ARM || XNN_ARCH_ARM64
543 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_eq_8) {
544 TEST_REQUIRES_ARM_NEON;
545 GemmMicrokernelTester()
546 .mr(4)
547 .nr(16)
548 .kr(1)
549 .sr(1)
550 .m(4)
551 .n(16)
552 .k(8)
553 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
554 }
555
556 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, strided_cn) {
557 TEST_REQUIRES_ARM_NEON;
558 GemmMicrokernelTester()
559 .mr(4)
560 .nr(16)
561 .kr(1)
562 .sr(1)
563 .m(4)
564 .n(16)
565 .k(8)
566 .cn_stride(19)
567 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
568 }
569
570 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_eq_8_subtile) {
571 TEST_REQUIRES_ARM_NEON;
572 for (uint32_t m = 1; m <= 4; m++) {
573 for (uint32_t n = 1; n <= 16; n++) {
574 GemmMicrokernelTester()
575 .mr(4)
576 .nr(16)
577 .kr(1)
578 .sr(1)
579 .m(m)
580 .n(n)
581 .k(8)
582 .iterations(1)
583 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
584 }
585 }
586 }
587
588 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_eq_8_subtile_m) {
589 TEST_REQUIRES_ARM_NEON;
590 for (uint32_t m = 1; m <= 4; m++) {
591 GemmMicrokernelTester()
592 .mr(4)
593 .nr(16)
594 .kr(1)
595 .sr(1)
596 .m(m)
597 .n(16)
598 .k(8)
599 .iterations(1)
600 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
601 }
602 }
603
604 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_eq_8_subtile_n) {
605 TEST_REQUIRES_ARM_NEON;
606 for (uint32_t n = 1; n <= 16; n++) {
607 GemmMicrokernelTester()
608 .mr(4)
609 .nr(16)
610 .kr(1)
611 .sr(1)
612 .m(4)
613 .n(n)
614 .k(8)
615 .iterations(1)
616 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
617 }
618 }
619
620 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_lt_8) {
621 TEST_REQUIRES_ARM_NEON;
622 for (size_t k = 1; k < 8; k++) {
623 GemmMicrokernelTester()
624 .mr(4)
625 .nr(16)
626 .kr(1)
627 .sr(1)
628 .m(4)
629 .n(16)
630 .k(k)
631 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
632 }
633 }
634
635 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_lt_8_subtile) {
636 TEST_REQUIRES_ARM_NEON;
637 for (size_t k = 1; k < 8; k++) {
638 for (uint32_t m = 1; m <= 4; m++) {
639 for (uint32_t n = 1; n <= 16; n++) {
640 GemmMicrokernelTester()
641 .mr(4)
642 .nr(16)
643 .kr(1)
644 .sr(1)
645 .m(m)
646 .n(n)
647 .k(k)
648 .iterations(1)
649 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
650 }
651 }
652 }
653 }
654
655 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_gt_8) {
656 TEST_REQUIRES_ARM_NEON;
657 for (size_t k = 9; k < 16; k++) {
658 GemmMicrokernelTester()
659 .mr(4)
660 .nr(16)
661 .kr(1)
662 .sr(1)
663 .m(4)
664 .n(16)
665 .k(k)
666 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
667 }
668 }
669
670 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_gt_8_subtile) {
671 TEST_REQUIRES_ARM_NEON;
672 for (size_t k = 9; k < 16; k++) {
673 for (uint32_t m = 1; m <= 4; m++) {
674 for (uint32_t n = 1; n <= 16; n++) {
675 GemmMicrokernelTester()
676 .mr(4)
677 .nr(16)
678 .kr(1)
679 .sr(1)
680 .m(m)
681 .n(n)
682 .k(k)
683 .iterations(1)
684 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
685 }
686 }
687 }
688 }
689
690 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_div_8) {
691 TEST_REQUIRES_ARM_NEON;
692 for (size_t k = 16; k <= 80; k += 8) {
693 GemmMicrokernelTester()
694 .mr(4)
695 .nr(16)
696 .kr(1)
697 .sr(1)
698 .m(4)
699 .n(16)
700 .k(k)
701 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
702 }
703 }
704
705 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_div_8_subtile) {
706 TEST_REQUIRES_ARM_NEON;
707 for (size_t k = 16; k <= 80; k += 8) {
708 for (uint32_t m = 1; m <= 4; m++) {
709 for (uint32_t n = 1; n <= 16; n++) {
710 GemmMicrokernelTester()
711 .mr(4)
712 .nr(16)
713 .kr(1)
714 .sr(1)
715 .m(m)
716 .n(n)
717 .k(k)
718 .iterations(1)
719 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
720 }
721 }
722 }
723 }
724
725 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_gt_16) {
726 TEST_REQUIRES_ARM_NEON;
727 for (uint32_t n = 17; n < 32; n++) {
728 for (size_t k = 1; k <= 40; k += 9) {
729 GemmMicrokernelTester()
730 .mr(4)
731 .nr(16)
732 .kr(1)
733 .sr(1)
734 .m(4)
735 .n(16)
736 .k(k)
737 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
738 }
739 }
740 }
741
742 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_gt_16_strided_cn) {
743 TEST_REQUIRES_ARM_NEON;
744 for (uint32_t n = 17; n < 32; n++) {
745 for (size_t k = 1; k <= 40; k += 9) {
746 GemmMicrokernelTester()
747 .mr(4)
748 .nr(16)
749 .kr(1)
750 .sr(1)
751 .m(4)
752 .n(16)
753 .k(k)
754 .cn_stride(19)
755 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
756 }
757 }
758 }
759
760 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_gt_16_subtile) {
761 TEST_REQUIRES_ARM_NEON;
762 for (uint32_t n = 17; n < 32; n++) {
763 for (size_t k = 1; k <= 40; k += 9) {
764 for (uint32_t m = 1; m <= 4; m++) {
765 GemmMicrokernelTester()
766 .mr(4)
767 .nr(16)
768 .kr(1)
769 .sr(1)
770 .m(m)
771 .n(n)
772 .k(k)
773 .iterations(1)
774 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
775 }
776 }
777 }
778 }
779
780 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_div_16) {
781 TEST_REQUIRES_ARM_NEON;
782 for (uint32_t n = 32; n <= 48; n += 16) {
783 for (size_t k = 1; k <= 40; k += 9) {
784 GemmMicrokernelTester()
785 .mr(4)
786 .nr(16)
787 .kr(1)
788 .sr(1)
789 .m(4)
790 .n(16)
791 .k(k)
792 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
793 }
794 }
795 }
796
797 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_div_16_strided_cn) {
798 TEST_REQUIRES_ARM_NEON;
799 for (uint32_t n = 32; n <= 48; n += 16) {
800 for (size_t k = 1; k <= 40; k += 9) {
801 GemmMicrokernelTester()
802 .mr(4)
803 .nr(16)
804 .kr(1)
805 .sr(1)
806 .m(4)
807 .n(n)
808 .k(k)
809 .cn_stride(19)
810 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
811 }
812 }
813 }
814
815 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_div_16_subtile) {
816 TEST_REQUIRES_ARM_NEON;
817 for (uint32_t n = 32; n <= 48; n += 16) {
818 for (size_t k = 1; k <= 40; k += 9) {
819 for (uint32_t m = 1; m <= 4; m++) {
820 GemmMicrokernelTester()
821 .mr(4)
822 .nr(16)
823 .kr(1)
824 .sr(1)
825 .m(m)
826 .n(n)
827 .k(k)
828 .iterations(1)
829 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
830 }
831 }
832 }
833 }
834
835 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, small_kernel) {
836 TEST_REQUIRES_ARM_NEON;
837 for (size_t k = 1; k <= 40; k += 9) {
838 GemmMicrokernelTester()
839 .mr(4)
840 .nr(16)
841 .kr(1)
842 .sr(1)
843 .m(4)
844 .n(16)
845 .k(k)
846 .ks(3)
847 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
848 }
849 }
850
851 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, small_kernel_subtile) {
852 TEST_REQUIRES_ARM_NEON;
853 for (size_t k = 1; k <= 40; k += 9) {
854 for (uint32_t m = 1; m <= 4; m++) {
855 for (uint32_t n = 1; n <= 16; n++) {
856 GemmMicrokernelTester()
857 .mr(4)
858 .nr(16)
859 .kr(1)
860 .sr(1)
861 .m(m)
862 .n(n)
863 .k(k)
864 .ks(3)
865 .iterations(1)
866 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
867 }
868 }
869 }
870 }
871
872 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_gt_16_small_kernel) {
873 TEST_REQUIRES_ARM_NEON;
874 for (uint32_t n = 17; n < 32; n++) {
875 for (size_t k = 1; k <= 40; k += 9) {
876 GemmMicrokernelTester()
877 .mr(4)
878 .nr(16)
879 .kr(1)
880 .sr(1)
881 .m(4)
882 .n(16)
883 .k(k)
884 .ks(3)
885 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
886 }
887 }
888 }
889
890 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_div_16_small_kernel) {
891 TEST_REQUIRES_ARM_NEON;
892 for (uint32_t n = 32; n <= 48; n += 16) {
893 for (size_t k = 1; k <= 40; k += 9) {
894 GemmMicrokernelTester()
895 .mr(4)
896 .nr(16)
897 .kr(1)
898 .sr(1)
899 .m(4)
900 .n(16)
901 .k(k)
902 .ks(3)
903 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
904 }
905 }
906 }
907
908 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, strided_cm_subtile) {
909 TEST_REQUIRES_ARM_NEON;
910 for (size_t k = 1; k <= 40; k += 9) {
911 for (uint32_t m = 1; m <= 4; m++) {
912 for (uint32_t n = 1; n <= 16; n++) {
913 GemmMicrokernelTester()
914 .mr(4)
915 .nr(16)
916 .kr(1)
917 .sr(1)
918 .m(m)
919 .n(n)
920 .k(k)
921 .cm_stride(19)
922 .iterations(1)
923 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
924 }
925 }
926 }
927 }
928
929 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, a_offset) {
930 TEST_REQUIRES_ARM_NEON;
931 for (size_t k = 1; k <= 40; k += 9) {
932 GemmMicrokernelTester()
933 .mr(4)
934 .nr(16)
935 .kr(1)
936 .sr(1)
937 .m(4)
938 .n(16)
939 .k(k)
940 .ks(3)
941 .a_offset(163)
942 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
943 }
944 }
945
946 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, zero) {
947 TEST_REQUIRES_ARM_NEON;
948 for (uint32_t mz = 0; mz < 4; mz++) {
949 for (size_t k = 1; k <= 40; k += 9) {
950 GemmMicrokernelTester()
951 .mr(4)
952 .nr(16)
953 .kr(1)
954 .sr(1)
955 .m(4)
956 .n(16)
957 .k(k)
958 .ks(3)
959 .a_offset(163)
960 .zero_index(mz)
961 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
962 }
963 }
964 }
965
966 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, qmin) {
967 TEST_REQUIRES_ARM_NEON;
968 GemmMicrokernelTester()
969 .mr(4)
970 .nr(16)
971 .kr(1)
972 .sr(1)
973 .m(4)
974 .n(16)
975 .k(8)
976 .qmin(128)
977 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
978 }
979
980 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, qmax) {
981 TEST_REQUIRES_ARM_NEON;
982 GemmMicrokernelTester()
983 .mr(4)
984 .nr(16)
985 .kr(1)
986 .sr(1)
987 .m(4)
988 .n(16)
989 .k(8)
990 .qmax(128)
991 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
992 }
993
994 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, strided_cm) {
995 TEST_REQUIRES_ARM_NEON;
996 GemmMicrokernelTester()
997 .mr(4)
998 .nr(16)
999 .kr(1)
1000 .sr(1)
1001 .m(4)
1002 .n(16)
1003 .k(8)
1004 .cm_stride(19)
1005 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
1006 }
1007
1008 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, no_a_zero_point) {
1009 TEST_REQUIRES_ARM_NEON;
1010 for (size_t k = 1; k <= 40; k += 9) {
1011 GemmMicrokernelTester()
1012 .mr(4)
1013 .nr(16)
1014 .kr(1)
1015 .sr(1)
1016 .m(4)
1017 .n(16)
1018 .k(k)
1019 .a_zero_point(0)
1020 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
1021 }
1022 }
1023
1024 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, no_b_zero_point) {
1025 TEST_REQUIRES_ARM_NEON;
1026 for (size_t k = 1; k <= 40; k += 9) {
1027 GemmMicrokernelTester()
1028 .mr(4)
1029 .nr(16)
1030 .kr(1)
1031 .sr(1)
1032 .m(4)
1033 .n(16)
1034 .k(k)
1035 .b_zero_point(0)
1036 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
1037 }
1038 }
1039
1040 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, no_zero_point) {
1041 TEST_REQUIRES_ARM_NEON;
1042 for (size_t k = 1; k <= 40; k += 9) {
1043 GemmMicrokernelTester()
1044 .mr(4)
1045 .nr(16)
1046 .kr(1)
1047 .sr(1)
1048 .m(4)
1049 .n(16)
1050 .k(k)
1051 .a_zero_point(0)
1052 .b_zero_point(0)
1053 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
1054 }
1055 }
1056#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1057
1058
1059#if XNN_ARCH_ARM || XNN_ARCH_ARM64
1060 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_eq_8) {
1061 TEST_REQUIRES_ARM_NEON_V8;
1062 GemmMicrokernelTester()
1063 .mr(1)
1064 .nr(16)
1065 .kr(1)
1066 .sr(1)
1067 .m(1)
1068 .n(16)
1069 .k(8)
1070 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
1071 }
1072
1073 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, strided_cn) {
1074 TEST_REQUIRES_ARM_NEON_V8;
1075 GemmMicrokernelTester()
1076 .mr(1)
1077 .nr(16)
1078 .kr(1)
1079 .sr(1)
1080 .m(1)
1081 .n(16)
1082 .k(8)
1083 .cn_stride(19)
1084 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
1085 }
1086
1087 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_eq_8_subtile) {
1088 TEST_REQUIRES_ARM_NEON_V8;
1089 for (uint32_t m = 1; m <= 1; m++) {
1090 for (uint32_t n = 1; n <= 16; n++) {
1091 GemmMicrokernelTester()
1092 .mr(1)
1093 .nr(16)
1094 .kr(1)
1095 .sr(1)
1096 .m(m)
1097 .n(n)
1098 .k(8)
1099 .iterations(1)
1100 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
1101 }
1102 }
1103 }
1104
1105 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_eq_8_subtile_m) {
1106 TEST_REQUIRES_ARM_NEON_V8;
1107 for (uint32_t m = 1; m <= 1; m++) {
1108 GemmMicrokernelTester()
1109 .mr(1)
1110 .nr(16)
1111 .kr(1)
1112 .sr(1)
1113 .m(m)
1114 .n(16)
1115 .k(8)
1116 .iterations(1)
1117 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
1118 }
1119 }
1120
1121 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_eq_8_subtile_n) {
1122 TEST_REQUIRES_ARM_NEON_V8;
1123 for (uint32_t n = 1; n <= 16; n++) {
1124 GemmMicrokernelTester()
1125 .mr(1)
1126 .nr(16)
1127 .kr(1)
1128 .sr(1)
1129 .m(1)
1130 .n(n)
1131 .k(8)
1132 .iterations(1)
1133 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
1134 }
1135 }
1136
1137 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_lt_8) {
1138 TEST_REQUIRES_ARM_NEON_V8;
1139 for (size_t k = 1; k < 8; k++) {
1140 GemmMicrokernelTester()
1141 .mr(1)
1142 .nr(16)
1143 .kr(1)
1144 .sr(1)
1145 .m(1)
1146 .n(16)
1147 .k(k)
1148 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
1149 }
1150 }
1151
1152 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_lt_8_subtile) {
1153 TEST_REQUIRES_ARM_NEON_V8;
1154 for (size_t k = 1; k < 8; k++) {
1155 for (uint32_t m = 1; m <= 1; m++) {
1156 for (uint32_t n = 1; n <= 16; n++) {
1157 GemmMicrokernelTester()
1158 .mr(1)
1159 .nr(16)
1160 .kr(1)
1161 .sr(1)
1162 .m(m)
1163 .n(n)
1164 .k(k)
1165 .iterations(1)
1166 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
1167 }
1168 }
1169 }
1170 }
1171
1172 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_gt_8) {
1173 TEST_REQUIRES_ARM_NEON_V8;
1174 for (size_t k = 9; k < 16; k++) {
1175 GemmMicrokernelTester()
1176 .mr(1)
1177 .nr(16)
1178 .kr(1)
1179 .sr(1)
1180 .m(1)
1181 .n(16)
1182 .k(k)
1183 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
1184 }
1185 }
1186
1187 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_gt_8_subtile) {
1188 TEST_REQUIRES_ARM_NEON_V8;
1189 for (size_t k = 9; k < 16; k++) {
1190 for (uint32_t m = 1; m <= 1; m++) {
1191 for (uint32_t n = 1; n <= 16; n++) {
1192 GemmMicrokernelTester()
1193 .mr(1)
1194 .nr(16)
1195 .kr(1)
1196 .sr(1)
1197 .m(m)
1198 .n(n)
1199 .k(k)
1200 .iterations(1)
1201 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
1202 }
1203 }
1204 }
1205 }
1206
1207 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_div_8) {
1208 TEST_REQUIRES_ARM_NEON_V8;
1209 for (size_t k = 16; k <= 80; k += 8) {
1210 GemmMicrokernelTester()
1211 .mr(1)
1212 .nr(16)
1213 .kr(1)
1214 .sr(1)
1215 .m(1)
1216 .n(16)
1217 .k(k)
1218 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
1219 }
1220 }
1221
1222 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_div_8_subtile) {
1223 TEST_REQUIRES_ARM_NEON_V8;
1224 for (size_t k = 16; k <= 80; k += 8) {
1225 for (uint32_t m = 1; m <= 1; m++) {
1226 for (uint32_t n = 1; n <= 16; n++) {
1227 GemmMicrokernelTester()
1228 .mr(1)
1229 .nr(16)
1230 .kr(1)
1231 .sr(1)
1232 .m(m)
1233 .n(n)
1234 .k(k)
1235 .iterations(1)
1236 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
1237 }
1238 }
1239 }
1240 }
1241
1242 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, n_gt_16) {
1243 TEST_REQUIRES_ARM_NEON_V8;
1244 for (uint32_t n = 17; n < 32; n++) {
1245 for (size_t k = 1; k <= 40; k += 9) {
1246 GemmMicrokernelTester()
1247 .mr(1)
1248 .nr(16)
1249 .kr(1)
1250 .sr(1)
1251 .m(1)
1252 .n(16)
1253 .k(k)
1254 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
1255 }
1256 }
1257 }
1258
1259 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, n_gt_16_strided_cn) {
1260 TEST_REQUIRES_ARM_NEON_V8;
1261 for (uint32_t n = 17; n < 32; n++) {
1262 for (size_t k = 1; k <= 40; k += 9) {
1263 GemmMicrokernelTester()
1264 .mr(1)
1265 .nr(16)
1266 .kr(1)
1267 .sr(1)
1268 .m(1)
1269 .n(16)
1270 .k(k)
1271 .cn_stride(19)
1272 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
1273 }
1274 }
1275 }
1276
1277 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, n_gt_16_subtile) {
1278 TEST_REQUIRES_ARM_NEON_V8;
1279 for (uint32_t n = 17; n < 32; n++) {
1280 for (size_t k = 1; k <= 40; k += 9) {
1281 for (uint32_t m = 1; m <= 1; m++) {
1282 GemmMicrokernelTester()
1283 .mr(1)
1284 .nr(16)
1285 .kr(1)
1286 .sr(1)
1287 .m(m)
1288 .n(n)
1289 .k(k)
1290 .iterations(1)
1291 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
1292 }
1293 }
1294 }
1295 }
1296
1297 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, n_div_16) {
1298 TEST_REQUIRES_ARM_NEON_V8;
1299 for (uint32_t n = 32; n <= 48; n += 16) {
1300 for (size_t k = 1; k <= 40; k += 9) {
1301 GemmMicrokernelTester()
1302 .mr(1)
1303 .nr(16)
1304 .kr(1)
1305 .sr(1)
1306 .m(1)
1307 .n(16)
1308 .k(k)
1309 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
1310 }
1311 }
1312 }
1313
1314 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, n_div_16_strided_cn) {
1315 TEST_REQUIRES_ARM_NEON_V8;
1316 for (uint32_t n = 32; n <= 48; n += 16) {
1317 for (size_t k = 1; k <= 40; k += 9) {
1318 GemmMicrokernelTester()
1319 .mr(1)
1320 .nr(16)
1321 .kr(1)
1322 .sr(1)
1323 .m(1)
1324 .n(n)
1325 .k(k)
1326 .cn_stride(19)
1327 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
1328 }
1329 }
1330 }
1331
1332 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, n_div_16_subtile) {
1333 TEST_REQUIRES_ARM_NEON_V8;
1334 for (uint32_t n = 32; n <= 48; n += 16) {
1335 for (size_t k = 1; k <= 40; k += 9) {
1336 for (uint32_t m = 1; m <= 1; m++) {
1337 GemmMicrokernelTester()
1338 .mr(1)
1339 .nr(16)
1340 .kr(1)
1341 .sr(1)
1342 .m(m)
1343 .n(n)
1344 .k(k)
1345 .iterations(1)
1346 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
1347 }
1348 }
1349 }
1350 }
1351
1352 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, small_kernel) {
1353 TEST_REQUIRES_ARM_NEON_V8;
1354 for (size_t k = 1; k <= 40; k += 9) {
1355 GemmMicrokernelTester()
1356 .mr(1)
1357 .nr(16)
1358 .kr(1)
1359 .sr(1)
1360 .m(1)
1361 .n(16)
1362 .k(k)
1363 .ks(3)
1364 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
1365 }
1366 }
1367
1368 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, small_kernel_subtile) {
1369 TEST_REQUIRES_ARM_NEON_V8;
1370 for (size_t k = 1; k <= 40; k += 9) {
1371 for (uint32_t m = 1; m <= 1; m++) {
1372 for (uint32_t n = 1; n <= 16; n++) {
1373 GemmMicrokernelTester()
1374 .mr(1)
1375 .nr(16)
1376 .kr(1)
1377 .sr(1)
1378 .m(m)
1379 .n(n)
1380 .k(k)
1381 .ks(3)
1382 .iterations(1)
1383 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
1384 }
1385 }
1386 }
1387 }
1388
1389 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, n_gt_16_small_kernel) {
1390 TEST_REQUIRES_ARM_NEON_V8;
1391 for (uint32_t n = 17; n < 32; n++) {
1392 for (size_t k = 1; k <= 40; k += 9) {
1393 GemmMicrokernelTester()
1394 .mr(1)
1395 .nr(16)
1396 .kr(1)
1397 .sr(1)
1398 .m(1)
1399 .n(16)
1400 .k(k)
1401 .ks(3)
1402 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
1403 }
1404 }
1405 }
1406
1407 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, n_div_16_small_kernel) {
1408 TEST_REQUIRES_ARM_NEON_V8;
1409 for (uint32_t n = 32; n <= 48; n += 16) {
1410 for (size_t k = 1; k <= 40; k += 9) {
1411 GemmMicrokernelTester()
1412 .mr(1)
1413 .nr(16)
1414 .kr(1)
1415 .sr(1)
1416 .m(1)
1417 .n(16)
1418 .k(k)
1419 .ks(3)
1420 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
1421 }
1422 }
1423 }
1424
1425 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, strided_cm_subtile) {
1426 TEST_REQUIRES_ARM_NEON_V8;
1427 for (size_t k = 1; k <= 40; k += 9) {
1428 for (uint32_t m = 1; m <= 1; m++) {
1429 for (uint32_t n = 1; n <= 16; n++) {
1430 GemmMicrokernelTester()
1431 .mr(1)
1432 .nr(16)
1433 .kr(1)
1434 .sr(1)
1435 .m(m)
1436 .n(n)
1437 .k(k)
1438 .cm_stride(19)
1439 .iterations(1)
1440 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
1441 }
1442 }
1443 }
1444 }
1445
1446 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, a_offset) {
1447 TEST_REQUIRES_ARM_NEON_V8;
1448 for (size_t k = 1; k <= 40; k += 9) {
1449 GemmMicrokernelTester()
1450 .mr(1)
1451 .nr(16)
1452 .kr(1)
1453 .sr(1)
1454 .m(1)
1455 .n(16)
1456 .k(k)
1457 .ks(3)
1458 .a_offset(43)
1459 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
1460 }
1461 }
1462
1463 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, zero) {
1464 TEST_REQUIRES_ARM_NEON_V8;
1465 for (uint32_t mz = 0; mz < 1; mz++) {
1466 for (size_t k = 1; k <= 40; k += 9) {
1467 GemmMicrokernelTester()
1468 .mr(1)
1469 .nr(16)
1470 .kr(1)
1471 .sr(1)
1472 .m(1)
1473 .n(16)
1474 .k(k)
1475 .ks(3)
1476 .a_offset(43)
1477 .zero_index(mz)
1478 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
1479 }
1480 }
1481 }
1482
1483 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, qmin) {
1484 TEST_REQUIRES_ARM_NEON_V8;
1485 GemmMicrokernelTester()
1486 .mr(1)
1487 .nr(16)
1488 .kr(1)
1489 .sr(1)
1490 .m(1)
1491 .n(16)
1492 .k(8)
1493 .qmin(128)
1494 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
1495 }
1496
1497 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, qmax) {
1498 TEST_REQUIRES_ARM_NEON_V8;
1499 GemmMicrokernelTester()
1500 .mr(1)
1501 .nr(16)
1502 .kr(1)
1503 .sr(1)
1504 .m(1)
1505 .n(16)
1506 .k(8)
1507 .qmax(128)
1508 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
1509 }
1510
1511 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, strided_cm) {
1512 TEST_REQUIRES_ARM_NEON_V8;
1513 GemmMicrokernelTester()
1514 .mr(1)
1515 .nr(16)
1516 .kr(1)
1517 .sr(1)
1518 .m(1)
1519 .n(16)
1520 .k(8)
1521 .cm_stride(19)
1522 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
1523 }
1524
1525 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, no_a_zero_point) {
1526 TEST_REQUIRES_ARM_NEON_V8;
1527 for (size_t k = 1; k <= 40; k += 9) {
1528 GemmMicrokernelTester()
1529 .mr(1)
1530 .nr(16)
1531 .kr(1)
1532 .sr(1)
1533 .m(1)
1534 .n(16)
1535 .k(k)
1536 .a_zero_point(0)
1537 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
1538 }
1539 }
1540
1541 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, no_b_zero_point) {
1542 TEST_REQUIRES_ARM_NEON_V8;
1543 for (size_t k = 1; k <= 40; k += 9) {
1544 GemmMicrokernelTester()
1545 .mr(1)
1546 .nr(16)
1547 .kr(1)
1548 .sr(1)
1549 .m(1)
1550 .n(16)
1551 .k(k)
1552 .b_zero_point(0)
1553 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
1554 }
1555 }
1556
1557 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, no_zero_point) {
1558 TEST_REQUIRES_ARM_NEON_V8;
1559 for (size_t k = 1; k <= 40; k += 9) {
1560 GemmMicrokernelTester()
1561 .mr(1)
1562 .nr(16)
1563 .kr(1)
1564 .sr(1)
1565 .m(1)
1566 .n(16)
1567 .k(k)
1568 .a_zero_point(0)
1569 .b_zero_point(0)
1570 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
1571 }
1572 }
1573#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1574
1575
1576#if XNN_ARCH_ARM || XNN_ARCH_ARM64
1577 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_eq_8) {
1578 TEST_REQUIRES_ARM_NEON_V8;
1579 GemmMicrokernelTester()
1580 .mr(4)
1581 .nr(16)
1582 .kr(1)
1583 .sr(1)
1584 .m(4)
1585 .n(16)
1586 .k(8)
1587 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
1588 }
1589
1590 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, strided_cn) {
1591 TEST_REQUIRES_ARM_NEON_V8;
1592 GemmMicrokernelTester()
1593 .mr(4)
1594 .nr(16)
1595 .kr(1)
1596 .sr(1)
1597 .m(4)
1598 .n(16)
1599 .k(8)
1600 .cn_stride(19)
1601 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
1602 }
1603
1604 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_eq_8_subtile) {
1605 TEST_REQUIRES_ARM_NEON_V8;
1606 for (uint32_t m = 1; m <= 4; m++) {
1607 for (uint32_t n = 1; n <= 16; n++) {
1608 GemmMicrokernelTester()
1609 .mr(4)
1610 .nr(16)
1611 .kr(1)
1612 .sr(1)
1613 .m(m)
1614 .n(n)
1615 .k(8)
1616 .iterations(1)
1617 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
1618 }
1619 }
1620 }
1621
1622 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_eq_8_subtile_m) {
1623 TEST_REQUIRES_ARM_NEON_V8;
1624 for (uint32_t m = 1; m <= 4; m++) {
1625 GemmMicrokernelTester()
1626 .mr(4)
1627 .nr(16)
1628 .kr(1)
1629 .sr(1)
1630 .m(m)
1631 .n(16)
1632 .k(8)
1633 .iterations(1)
1634 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
1635 }
1636 }
1637
1638 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_eq_8_subtile_n) {
1639 TEST_REQUIRES_ARM_NEON_V8;
1640 for (uint32_t n = 1; n <= 16; n++) {
1641 GemmMicrokernelTester()
1642 .mr(4)
1643 .nr(16)
1644 .kr(1)
1645 .sr(1)
1646 .m(4)
1647 .n(n)
1648 .k(8)
1649 .iterations(1)
1650 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
1651 }
1652 }
1653
1654 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_lt_8) {
1655 TEST_REQUIRES_ARM_NEON_V8;
1656 for (size_t k = 1; k < 8; k++) {
1657 GemmMicrokernelTester()
1658 .mr(4)
1659 .nr(16)
1660 .kr(1)
1661 .sr(1)
1662 .m(4)
1663 .n(16)
1664 .k(k)
1665 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
1666 }
1667 }
1668
1669 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_lt_8_subtile) {
1670 TEST_REQUIRES_ARM_NEON_V8;
1671 for (size_t k = 1; k < 8; k++) {
1672 for (uint32_t m = 1; m <= 4; m++) {
1673 for (uint32_t n = 1; n <= 16; n++) {
1674 GemmMicrokernelTester()
1675 .mr(4)
1676 .nr(16)
1677 .kr(1)
1678 .sr(1)
1679 .m(m)
1680 .n(n)
1681 .k(k)
1682 .iterations(1)
1683 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
1684 }
1685 }
1686 }
1687 }
1688
1689 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_gt_8) {
1690 TEST_REQUIRES_ARM_NEON_V8;
1691 for (size_t k = 9; k < 16; k++) {
1692 GemmMicrokernelTester()
1693 .mr(4)
1694 .nr(16)
1695 .kr(1)
1696 .sr(1)
1697 .m(4)
1698 .n(16)
1699 .k(k)
1700 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
1701 }
1702 }
1703
1704 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_gt_8_subtile) {
1705 TEST_REQUIRES_ARM_NEON_V8;
1706 for (size_t k = 9; k < 16; k++) {
1707 for (uint32_t m = 1; m <= 4; m++) {
1708 for (uint32_t n = 1; n <= 16; n++) {
1709 GemmMicrokernelTester()
1710 .mr(4)
1711 .nr(16)
1712 .kr(1)
1713 .sr(1)
1714 .m(m)
1715 .n(n)
1716 .k(k)
1717 .iterations(1)
1718 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
1719 }
1720 }
1721 }
1722 }
1723
1724 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_div_8) {
1725 TEST_REQUIRES_ARM_NEON_V8;
1726 for (size_t k = 16; k <= 80; k += 8) {
1727 GemmMicrokernelTester()
1728 .mr(4)
1729 .nr(16)
1730 .kr(1)
1731 .sr(1)
1732 .m(4)
1733 .n(16)
1734 .k(k)
1735 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
1736 }
1737 }
1738
1739 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_div_8_subtile) {
1740 TEST_REQUIRES_ARM_NEON_V8;
1741 for (size_t k = 16; k <= 80; k += 8) {
1742 for (uint32_t m = 1; m <= 4; m++) {
1743 for (uint32_t n = 1; n <= 16; n++) {
1744 GemmMicrokernelTester()
1745 .mr(4)
1746 .nr(16)
1747 .kr(1)
1748 .sr(1)
1749 .m(m)
1750 .n(n)
1751 .k(k)
1752 .iterations(1)
1753 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
1754 }
1755 }
1756 }
1757 }
1758
1759 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, n_gt_16) {
1760 TEST_REQUIRES_ARM_NEON_V8;
1761 for (uint32_t n = 17; n < 32; n++) {
1762 for (size_t k = 1; k <= 40; k += 9) {
1763 GemmMicrokernelTester()
1764 .mr(4)
1765 .nr(16)
1766 .kr(1)
1767 .sr(1)
1768 .m(4)
1769 .n(16)
1770 .k(k)
1771 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
1772 }
1773 }
1774 }
1775
1776 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, n_gt_16_strided_cn) {
1777 TEST_REQUIRES_ARM_NEON_V8;
1778 for (uint32_t n = 17; n < 32; n++) {
1779 for (size_t k = 1; k <= 40; k += 9) {
1780 GemmMicrokernelTester()
1781 .mr(4)
1782 .nr(16)
1783 .kr(1)
1784 .sr(1)
1785 .m(4)
1786 .n(16)
1787 .k(k)
1788 .cn_stride(19)
1789 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
1790 }
1791 }
1792 }
1793
1794 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, n_gt_16_subtile) {
1795 TEST_REQUIRES_ARM_NEON_V8;
1796 for (uint32_t n = 17; n < 32; n++) {
1797 for (size_t k = 1; k <= 40; k += 9) {
1798 for (uint32_t m = 1; m <= 4; m++) {
1799 GemmMicrokernelTester()
1800 .mr(4)
1801 .nr(16)
1802 .kr(1)
1803 .sr(1)
1804 .m(m)
1805 .n(n)
1806 .k(k)
1807 .iterations(1)
1808 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
1809 }
1810 }
1811 }
1812 }
1813
1814 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, n_div_16) {
1815 TEST_REQUIRES_ARM_NEON_V8;
1816 for (uint32_t n = 32; n <= 48; n += 16) {
1817 for (size_t k = 1; k <= 40; k += 9) {
1818 GemmMicrokernelTester()
1819 .mr(4)
1820 .nr(16)
1821 .kr(1)
1822 .sr(1)
1823 .m(4)
1824 .n(16)
1825 .k(k)
1826 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
1827 }
1828 }
1829 }
1830
1831 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, n_div_16_strided_cn) {
1832 TEST_REQUIRES_ARM_NEON_V8;
1833 for (uint32_t n = 32; n <= 48; n += 16) {
1834 for (size_t k = 1; k <= 40; k += 9) {
1835 GemmMicrokernelTester()
1836 .mr(4)
1837 .nr(16)
1838 .kr(1)
1839 .sr(1)
1840 .m(4)
1841 .n(n)
1842 .k(k)
1843 .cn_stride(19)
1844 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
1845 }
1846 }
1847 }
1848
1849 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, n_div_16_subtile) {
1850 TEST_REQUIRES_ARM_NEON_V8;
1851 for (uint32_t n = 32; n <= 48; n += 16) {
1852 for (size_t k = 1; k <= 40; k += 9) {
1853 for (uint32_t m = 1; m <= 4; m++) {
1854 GemmMicrokernelTester()
1855 .mr(4)
1856 .nr(16)
1857 .kr(1)
1858 .sr(1)
1859 .m(m)
1860 .n(n)
1861 .k(k)
1862 .iterations(1)
1863 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
1864 }
1865 }
1866 }
1867 }
1868
1869 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, small_kernel) {
1870 TEST_REQUIRES_ARM_NEON_V8;
1871 for (size_t k = 1; k <= 40; k += 9) {
1872 GemmMicrokernelTester()
1873 .mr(4)
1874 .nr(16)
1875 .kr(1)
1876 .sr(1)
1877 .m(4)
1878 .n(16)
1879 .k(k)
1880 .ks(3)
1881 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
1882 }
1883 }
1884
1885 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, small_kernel_subtile) {
1886 TEST_REQUIRES_ARM_NEON_V8;
1887 for (size_t k = 1; k <= 40; k += 9) {
1888 for (uint32_t m = 1; m <= 4; m++) {
1889 for (uint32_t n = 1; n <= 16; n++) {
1890 GemmMicrokernelTester()
1891 .mr(4)
1892 .nr(16)
1893 .kr(1)
1894 .sr(1)
1895 .m(m)
1896 .n(n)
1897 .k(k)
1898 .ks(3)
1899 .iterations(1)
1900 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
1901 }
1902 }
1903 }
1904 }
1905
1906 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, n_gt_16_small_kernel) {
1907 TEST_REQUIRES_ARM_NEON_V8;
1908 for (uint32_t n = 17; n < 32; n++) {
1909 for (size_t k = 1; k <= 40; k += 9) {
1910 GemmMicrokernelTester()
1911 .mr(4)
1912 .nr(16)
1913 .kr(1)
1914 .sr(1)
1915 .m(4)
1916 .n(16)
1917 .k(k)
1918 .ks(3)
1919 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
1920 }
1921 }
1922 }
1923
1924 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, n_div_16_small_kernel) {
1925 TEST_REQUIRES_ARM_NEON_V8;
1926 for (uint32_t n = 32; n <= 48; n += 16) {
1927 for (size_t k = 1; k <= 40; k += 9) {
1928 GemmMicrokernelTester()
1929 .mr(4)
1930 .nr(16)
1931 .kr(1)
1932 .sr(1)
1933 .m(4)
1934 .n(16)
1935 .k(k)
1936 .ks(3)
1937 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
1938 }
1939 }
1940 }
1941
1942 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, strided_cm_subtile) {
1943 TEST_REQUIRES_ARM_NEON_V8;
1944 for (size_t k = 1; k <= 40; k += 9) {
1945 for (uint32_t m = 1; m <= 4; m++) {
1946 for (uint32_t n = 1; n <= 16; n++) {
1947 GemmMicrokernelTester()
1948 .mr(4)
1949 .nr(16)
1950 .kr(1)
1951 .sr(1)
1952 .m(m)
1953 .n(n)
1954 .k(k)
1955 .cm_stride(19)
1956 .iterations(1)
1957 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
1958 }
1959 }
1960 }
1961 }
1962
1963 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, a_offset) {
1964 TEST_REQUIRES_ARM_NEON_V8;
1965 for (size_t k = 1; k <= 40; k += 9) {
1966 GemmMicrokernelTester()
1967 .mr(4)
1968 .nr(16)
1969 .kr(1)
1970 .sr(1)
1971 .m(4)
1972 .n(16)
1973 .k(k)
1974 .ks(3)
1975 .a_offset(163)
1976 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
1977 }
1978 }
1979
1980 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, zero) {
1981 TEST_REQUIRES_ARM_NEON_V8;
1982 for (uint32_t mz = 0; mz < 4; mz++) {
1983 for (size_t k = 1; k <= 40; k += 9) {
1984 GemmMicrokernelTester()
1985 .mr(4)
1986 .nr(16)
1987 .kr(1)
1988 .sr(1)
1989 .m(4)
1990 .n(16)
1991 .k(k)
1992 .ks(3)
1993 .a_offset(163)
1994 .zero_index(mz)
1995 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
1996 }
1997 }
1998 }
1999
2000 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, qmin) {
2001 TEST_REQUIRES_ARM_NEON_V8;
2002 GemmMicrokernelTester()
2003 .mr(4)
2004 .nr(16)
2005 .kr(1)
2006 .sr(1)
2007 .m(4)
2008 .n(16)
2009 .k(8)
2010 .qmin(128)
2011 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
2012 }
2013
2014 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, qmax) {
2015 TEST_REQUIRES_ARM_NEON_V8;
2016 GemmMicrokernelTester()
2017 .mr(4)
2018 .nr(16)
2019 .kr(1)
2020 .sr(1)
2021 .m(4)
2022 .n(16)
2023 .k(8)
2024 .qmax(128)
2025 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
2026 }
2027
2028 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, strided_cm) {
2029 TEST_REQUIRES_ARM_NEON_V8;
2030 GemmMicrokernelTester()
2031 .mr(4)
2032 .nr(16)
2033 .kr(1)
2034 .sr(1)
2035 .m(4)
2036 .n(16)
2037 .k(8)
2038 .cm_stride(19)
2039 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
2040 }
2041
2042 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, no_a_zero_point) {
2043 TEST_REQUIRES_ARM_NEON_V8;
2044 for (size_t k = 1; k <= 40; k += 9) {
2045 GemmMicrokernelTester()
2046 .mr(4)
2047 .nr(16)
2048 .kr(1)
2049 .sr(1)
2050 .m(4)
2051 .n(16)
2052 .k(k)
2053 .a_zero_point(0)
2054 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
2055 }
2056 }
2057
2058 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, no_b_zero_point) {
2059 TEST_REQUIRES_ARM_NEON_V8;
2060 for (size_t k = 1; k <= 40; k += 9) {
2061 GemmMicrokernelTester()
2062 .mr(4)
2063 .nr(16)
2064 .kr(1)
2065 .sr(1)
2066 .m(4)
2067 .n(16)
2068 .k(k)
2069 .b_zero_point(0)
2070 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
2071 }
2072 }
2073
2074 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, no_zero_point) {
2075 TEST_REQUIRES_ARM_NEON_V8;
2076 for (size_t k = 1; k <= 40; k += 9) {
2077 GemmMicrokernelTester()
2078 .mr(4)
2079 .nr(16)
2080 .kr(1)
2081 .sr(1)
2082 .m(4)
2083 .n(16)
2084 .k(k)
2085 .a_zero_point(0)
2086 .b_zero_point(0)
2087 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
2088 }
2089 }
2090#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2091
2092
Marat Dukhanef47f8d2021-07-02 15:08:32 -07002093#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2094 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_eq_8) {
2095 TEST_REQUIRES_X86_SSE2;
2096 GemmMicrokernelTester()
2097 .mr(1)
2098 .nr(4)
2099 .kr(2)
2100 .sr(1)
2101 .m(1)
2102 .n(4)
2103 .k(8)
2104 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
2105 }
2106
2107 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, strided_cn) {
2108 TEST_REQUIRES_X86_SSE2;
2109 GemmMicrokernelTester()
2110 .mr(1)
2111 .nr(4)
2112 .kr(2)
2113 .sr(1)
2114 .m(1)
2115 .n(4)
2116 .k(8)
2117 .cn_stride(7)
2118 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
2119 }
2120
2121 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_eq_8_subtile) {
2122 TEST_REQUIRES_X86_SSE2;
2123 for (uint32_t m = 1; m <= 1; m++) {
2124 for (uint32_t n = 1; n <= 4; n++) {
2125 GemmMicrokernelTester()
2126 .mr(1)
2127 .nr(4)
2128 .kr(2)
2129 .sr(1)
2130 .m(m)
2131 .n(n)
2132 .k(8)
2133 .iterations(1)
2134 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
2135 }
2136 }
2137 }
2138
2139 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_eq_8_subtile_m) {
2140 TEST_REQUIRES_X86_SSE2;
2141 for (uint32_t m = 1; m <= 1; m++) {
2142 GemmMicrokernelTester()
2143 .mr(1)
2144 .nr(4)
2145 .kr(2)
2146 .sr(1)
2147 .m(m)
2148 .n(4)
2149 .k(8)
2150 .iterations(1)
2151 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
2152 }
2153 }
2154
2155 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_eq_8_subtile_n) {
2156 TEST_REQUIRES_X86_SSE2;
2157 for (uint32_t n = 1; n <= 4; n++) {
2158 GemmMicrokernelTester()
2159 .mr(1)
2160 .nr(4)
2161 .kr(2)
2162 .sr(1)
2163 .m(1)
2164 .n(n)
2165 .k(8)
2166 .iterations(1)
2167 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
2168 }
2169 }
2170
2171 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_lt_8) {
2172 TEST_REQUIRES_X86_SSE2;
2173 for (size_t k = 1; k < 8; k++) {
2174 GemmMicrokernelTester()
2175 .mr(1)
2176 .nr(4)
2177 .kr(2)
2178 .sr(1)
2179 .m(1)
2180 .n(4)
2181 .k(k)
2182 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
2183 }
2184 }
2185
2186 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_lt_8_subtile) {
2187 TEST_REQUIRES_X86_SSE2;
2188 for (size_t k = 1; k < 8; k++) {
2189 for (uint32_t m = 1; m <= 1; m++) {
2190 for (uint32_t n = 1; n <= 4; n++) {
2191 GemmMicrokernelTester()
2192 .mr(1)
2193 .nr(4)
2194 .kr(2)
2195 .sr(1)
2196 .m(m)
2197 .n(n)
2198 .k(k)
2199 .iterations(1)
2200 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
2201 }
2202 }
2203 }
2204 }
2205
2206 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_gt_8) {
2207 TEST_REQUIRES_X86_SSE2;
2208 for (size_t k = 9; k < 16; k++) {
2209 GemmMicrokernelTester()
2210 .mr(1)
2211 .nr(4)
2212 .kr(2)
2213 .sr(1)
2214 .m(1)
2215 .n(4)
2216 .k(k)
2217 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
2218 }
2219 }
2220
2221 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_gt_8_subtile) {
2222 TEST_REQUIRES_X86_SSE2;
2223 for (size_t k = 9; k < 16; k++) {
2224 for (uint32_t m = 1; m <= 1; m++) {
2225 for (uint32_t n = 1; n <= 4; n++) {
2226 GemmMicrokernelTester()
2227 .mr(1)
2228 .nr(4)
2229 .kr(2)
2230 .sr(1)
2231 .m(m)
2232 .n(n)
2233 .k(k)
2234 .iterations(1)
2235 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
2236 }
2237 }
2238 }
2239 }
2240
2241 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_div_8) {
2242 TEST_REQUIRES_X86_SSE2;
2243 for (size_t k = 16; k <= 80; k += 8) {
2244 GemmMicrokernelTester()
2245 .mr(1)
2246 .nr(4)
2247 .kr(2)
2248 .sr(1)
2249 .m(1)
2250 .n(4)
2251 .k(k)
2252 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
2253 }
2254 }
2255
2256 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_div_8_subtile) {
2257 TEST_REQUIRES_X86_SSE2;
2258 for (size_t k = 16; k <= 80; k += 8) {
2259 for (uint32_t m = 1; m <= 1; m++) {
2260 for (uint32_t n = 1; n <= 4; n++) {
2261 GemmMicrokernelTester()
2262 .mr(1)
2263 .nr(4)
2264 .kr(2)
2265 .sr(1)
2266 .m(m)
2267 .n(n)
2268 .k(k)
2269 .iterations(1)
2270 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
2271 }
2272 }
2273 }
2274 }
2275
2276 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, n_gt_4) {
2277 TEST_REQUIRES_X86_SSE2;
2278 for (uint32_t n = 5; n < 8; n++) {
2279 for (size_t k = 1; k <= 40; k += 9) {
2280 GemmMicrokernelTester()
2281 .mr(1)
2282 .nr(4)
2283 .kr(2)
2284 .sr(1)
2285 .m(1)
2286 .n(4)
2287 .k(k)
2288 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
2289 }
2290 }
2291 }
2292
2293 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, n_gt_4_strided_cn) {
2294 TEST_REQUIRES_X86_SSE2;
2295 for (uint32_t n = 5; n < 8; n++) {
2296 for (size_t k = 1; k <= 40; k += 9) {
2297 GemmMicrokernelTester()
2298 .mr(1)
2299 .nr(4)
2300 .kr(2)
2301 .sr(1)
2302 .m(1)
2303 .n(4)
2304 .k(k)
2305 .cn_stride(7)
2306 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
2307 }
2308 }
2309 }
2310
2311 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, n_gt_4_subtile) {
2312 TEST_REQUIRES_X86_SSE2;
2313 for (uint32_t n = 5; n < 8; n++) {
2314 for (size_t k = 1; k <= 40; k += 9) {
2315 for (uint32_t m = 1; m <= 1; m++) {
2316 GemmMicrokernelTester()
2317 .mr(1)
2318 .nr(4)
2319 .kr(2)
2320 .sr(1)
2321 .m(m)
2322 .n(n)
2323 .k(k)
2324 .iterations(1)
2325 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
2326 }
2327 }
2328 }
2329 }
2330
2331 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, n_div_4) {
2332 TEST_REQUIRES_X86_SSE2;
2333 for (uint32_t n = 8; n <= 12; n += 4) {
2334 for (size_t k = 1; k <= 40; k += 9) {
2335 GemmMicrokernelTester()
2336 .mr(1)
2337 .nr(4)
2338 .kr(2)
2339 .sr(1)
2340 .m(1)
2341 .n(4)
2342 .k(k)
2343 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
2344 }
2345 }
2346 }
2347
2348 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, n_div_4_strided_cn) {
2349 TEST_REQUIRES_X86_SSE2;
2350 for (uint32_t n = 8; n <= 12; n += 4) {
2351 for (size_t k = 1; k <= 40; k += 9) {
2352 GemmMicrokernelTester()
2353 .mr(1)
2354 .nr(4)
2355 .kr(2)
2356 .sr(1)
2357 .m(1)
2358 .n(n)
2359 .k(k)
2360 .cn_stride(7)
2361 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
2362 }
2363 }
2364 }
2365
2366 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, n_div_4_subtile) {
2367 TEST_REQUIRES_X86_SSE2;
2368 for (uint32_t n = 8; n <= 12; n += 4) {
2369 for (size_t k = 1; k <= 40; k += 9) {
2370 for (uint32_t m = 1; m <= 1; m++) {
2371 GemmMicrokernelTester()
2372 .mr(1)
2373 .nr(4)
2374 .kr(2)
2375 .sr(1)
2376 .m(m)
2377 .n(n)
2378 .k(k)
2379 .iterations(1)
2380 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
2381 }
2382 }
2383 }
2384 }
2385
2386 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, small_kernel) {
2387 TEST_REQUIRES_X86_SSE2;
2388 for (size_t k = 1; k <= 40; k += 9) {
2389 GemmMicrokernelTester()
2390 .mr(1)
2391 .nr(4)
2392 .kr(2)
2393 .sr(1)
2394 .m(1)
2395 .n(4)
2396 .k(k)
2397 .ks(3)
2398 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
2399 }
2400 }
2401
2402 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, small_kernel_subtile) {
2403 TEST_REQUIRES_X86_SSE2;
2404 for (size_t k = 1; k <= 40; k += 9) {
2405 for (uint32_t m = 1; m <= 1; m++) {
2406 for (uint32_t n = 1; n <= 4; n++) {
2407 GemmMicrokernelTester()
2408 .mr(1)
2409 .nr(4)
2410 .kr(2)
2411 .sr(1)
2412 .m(m)
2413 .n(n)
2414 .k(k)
2415 .ks(3)
2416 .iterations(1)
2417 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
2418 }
2419 }
2420 }
2421 }
2422
2423 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, n_gt_4_small_kernel) {
2424 TEST_REQUIRES_X86_SSE2;
2425 for (uint32_t n = 5; n < 8; n++) {
2426 for (size_t k = 1; k <= 40; k += 9) {
2427 GemmMicrokernelTester()
2428 .mr(1)
2429 .nr(4)
2430 .kr(2)
2431 .sr(1)
2432 .m(1)
2433 .n(4)
2434 .k(k)
2435 .ks(3)
2436 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
2437 }
2438 }
2439 }
2440
2441 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, n_div_4_small_kernel) {
2442 TEST_REQUIRES_X86_SSE2;
2443 for (uint32_t n = 8; n <= 12; n += 4) {
2444 for (size_t k = 1; k <= 40; k += 9) {
2445 GemmMicrokernelTester()
2446 .mr(1)
2447 .nr(4)
2448 .kr(2)
2449 .sr(1)
2450 .m(1)
2451 .n(4)
2452 .k(k)
2453 .ks(3)
2454 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
2455 }
2456 }
2457 }
2458
2459 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, strided_cm_subtile) {
2460 TEST_REQUIRES_X86_SSE2;
2461 for (size_t k = 1; k <= 40; k += 9) {
2462 for (uint32_t m = 1; m <= 1; m++) {
2463 for (uint32_t n = 1; n <= 4; n++) {
2464 GemmMicrokernelTester()
2465 .mr(1)
2466 .nr(4)
2467 .kr(2)
2468 .sr(1)
2469 .m(m)
2470 .n(n)
2471 .k(k)
2472 .cm_stride(7)
2473 .iterations(1)
2474 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
2475 }
2476 }
2477 }
2478 }
2479
2480 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, a_offset) {
2481 TEST_REQUIRES_X86_SSE2;
2482 for (size_t k = 1; k <= 40; k += 9) {
2483 GemmMicrokernelTester()
2484 .mr(1)
2485 .nr(4)
2486 .kr(2)
2487 .sr(1)
2488 .m(1)
2489 .n(4)
2490 .k(k)
2491 .ks(3)
2492 .a_offset(43)
2493 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
2494 }
2495 }
2496
2497 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, zero) {
2498 TEST_REQUIRES_X86_SSE2;
2499 for (uint32_t mz = 0; mz < 1; mz++) {
2500 for (size_t k = 1; k <= 40; k += 9) {
2501 GemmMicrokernelTester()
2502 .mr(1)
2503 .nr(4)
2504 .kr(2)
2505 .sr(1)
2506 .m(1)
2507 .n(4)
2508 .k(k)
2509 .ks(3)
2510 .a_offset(43)
2511 .zero_index(mz)
2512 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
2513 }
2514 }
2515 }
2516
2517 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, qmin) {
2518 TEST_REQUIRES_X86_SSE2;
2519 GemmMicrokernelTester()
2520 .mr(1)
2521 .nr(4)
2522 .kr(2)
2523 .sr(1)
2524 .m(1)
2525 .n(4)
2526 .k(8)
2527 .qmin(128)
2528 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
2529 }
2530
2531 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, qmax) {
2532 TEST_REQUIRES_X86_SSE2;
2533 GemmMicrokernelTester()
2534 .mr(1)
2535 .nr(4)
2536 .kr(2)
2537 .sr(1)
2538 .m(1)
2539 .n(4)
2540 .k(8)
2541 .qmax(128)
2542 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
2543 }
2544
2545 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, strided_cm) {
2546 TEST_REQUIRES_X86_SSE2;
2547 GemmMicrokernelTester()
2548 .mr(1)
2549 .nr(4)
2550 .kr(2)
2551 .sr(1)
2552 .m(1)
2553 .n(4)
2554 .k(8)
2555 .cm_stride(7)
2556 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
2557 }
2558
2559 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, no_a_zero_point) {
2560 TEST_REQUIRES_X86_SSE2;
2561 for (size_t k = 1; k <= 40; k += 9) {
2562 GemmMicrokernelTester()
2563 .mr(1)
2564 .nr(4)
2565 .kr(2)
2566 .sr(1)
2567 .m(1)
2568 .n(4)
2569 .k(k)
2570 .a_zero_point(0)
2571 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
2572 }
2573 }
2574
2575 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, no_b_zero_point) {
2576 TEST_REQUIRES_X86_SSE2;
2577 for (size_t k = 1; k <= 40; k += 9) {
2578 GemmMicrokernelTester()
2579 .mr(1)
2580 .nr(4)
2581 .kr(2)
2582 .sr(1)
2583 .m(1)
2584 .n(4)
2585 .k(k)
2586 .b_zero_point(0)
2587 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
2588 }
2589 }
2590
2591 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, no_zero_point) {
2592 TEST_REQUIRES_X86_SSE2;
2593 for (size_t k = 1; k <= 40; k += 9) {
2594 GemmMicrokernelTester()
2595 .mr(1)
2596 .nr(4)
2597 .kr(2)
2598 .sr(1)
2599 .m(1)
2600 .n(4)
2601 .k(k)
2602 .a_zero_point(0)
2603 .b_zero_point(0)
2604 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
2605 }
2606 }
2607#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2608
2609
2610#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2611 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_eq_8) {
2612 TEST_REQUIRES_X86_SSE2;
2613 GemmMicrokernelTester()
2614 .mr(2)
2615 .nr(4)
2616 .kr(2)
2617 .sr(1)
2618 .m(2)
2619 .n(4)
2620 .k(8)
2621 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
2622 }
2623
2624 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, strided_cn) {
2625 TEST_REQUIRES_X86_SSE2;
2626 GemmMicrokernelTester()
2627 .mr(2)
2628 .nr(4)
2629 .kr(2)
2630 .sr(1)
2631 .m(2)
2632 .n(4)
2633 .k(8)
2634 .cn_stride(7)
2635 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
2636 }
2637
2638 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_eq_8_subtile) {
2639 TEST_REQUIRES_X86_SSE2;
2640 for (uint32_t m = 1; m <= 2; m++) {
2641 for (uint32_t n = 1; n <= 4; n++) {
2642 GemmMicrokernelTester()
2643 .mr(2)
2644 .nr(4)
2645 .kr(2)
2646 .sr(1)
2647 .m(m)
2648 .n(n)
2649 .k(8)
2650 .iterations(1)
2651 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
2652 }
2653 }
2654 }
2655
2656 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_eq_8_subtile_m) {
2657 TEST_REQUIRES_X86_SSE2;
2658 for (uint32_t m = 1; m <= 2; m++) {
2659 GemmMicrokernelTester()
2660 .mr(2)
2661 .nr(4)
2662 .kr(2)
2663 .sr(1)
2664 .m(m)
2665 .n(4)
2666 .k(8)
2667 .iterations(1)
2668 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
2669 }
2670 }
2671
2672 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_eq_8_subtile_n) {
2673 TEST_REQUIRES_X86_SSE2;
2674 for (uint32_t n = 1; n <= 4; n++) {
2675 GemmMicrokernelTester()
2676 .mr(2)
2677 .nr(4)
2678 .kr(2)
2679 .sr(1)
2680 .m(2)
2681 .n(n)
2682 .k(8)
2683 .iterations(1)
2684 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
2685 }
2686 }
2687
2688 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_lt_8) {
2689 TEST_REQUIRES_X86_SSE2;
2690 for (size_t k = 1; k < 8; k++) {
2691 GemmMicrokernelTester()
2692 .mr(2)
2693 .nr(4)
2694 .kr(2)
2695 .sr(1)
2696 .m(2)
2697 .n(4)
2698 .k(k)
2699 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
2700 }
2701 }
2702
2703 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_lt_8_subtile) {
2704 TEST_REQUIRES_X86_SSE2;
2705 for (size_t k = 1; k < 8; k++) {
2706 for (uint32_t m = 1; m <= 2; m++) {
2707 for (uint32_t n = 1; n <= 4; n++) {
2708 GemmMicrokernelTester()
2709 .mr(2)
2710 .nr(4)
2711 .kr(2)
2712 .sr(1)
2713 .m(m)
2714 .n(n)
2715 .k(k)
2716 .iterations(1)
2717 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
2718 }
2719 }
2720 }
2721 }
2722
2723 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_gt_8) {
2724 TEST_REQUIRES_X86_SSE2;
2725 for (size_t k = 9; k < 16; k++) {
2726 GemmMicrokernelTester()
2727 .mr(2)
2728 .nr(4)
2729 .kr(2)
2730 .sr(1)
2731 .m(2)
2732 .n(4)
2733 .k(k)
2734 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
2735 }
2736 }
2737
2738 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_gt_8_subtile) {
2739 TEST_REQUIRES_X86_SSE2;
2740 for (size_t k = 9; k < 16; k++) {
2741 for (uint32_t m = 1; m <= 2; m++) {
2742 for (uint32_t n = 1; n <= 4; n++) {
2743 GemmMicrokernelTester()
2744 .mr(2)
2745 .nr(4)
2746 .kr(2)
2747 .sr(1)
2748 .m(m)
2749 .n(n)
2750 .k(k)
2751 .iterations(1)
2752 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
2753 }
2754 }
2755 }
2756 }
2757
2758 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_div_8) {
2759 TEST_REQUIRES_X86_SSE2;
2760 for (size_t k = 16; k <= 80; k += 8) {
2761 GemmMicrokernelTester()
2762 .mr(2)
2763 .nr(4)
2764 .kr(2)
2765 .sr(1)
2766 .m(2)
2767 .n(4)
2768 .k(k)
2769 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
2770 }
2771 }
2772
2773 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_div_8_subtile) {
2774 TEST_REQUIRES_X86_SSE2;
2775 for (size_t k = 16; k <= 80; k += 8) {
2776 for (uint32_t m = 1; m <= 2; m++) {
2777 for (uint32_t n = 1; n <= 4; n++) {
2778 GemmMicrokernelTester()
2779 .mr(2)
2780 .nr(4)
2781 .kr(2)
2782 .sr(1)
2783 .m(m)
2784 .n(n)
2785 .k(k)
2786 .iterations(1)
2787 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
2788 }
2789 }
2790 }
2791 }
2792
2793 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, n_gt_4) {
2794 TEST_REQUIRES_X86_SSE2;
2795 for (uint32_t n = 5; n < 8; n++) {
2796 for (size_t k = 1; k <= 40; k += 9) {
2797 GemmMicrokernelTester()
2798 .mr(2)
2799 .nr(4)
2800 .kr(2)
2801 .sr(1)
2802 .m(2)
2803 .n(4)
2804 .k(k)
2805 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
2806 }
2807 }
2808 }
2809
2810 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, n_gt_4_strided_cn) {
2811 TEST_REQUIRES_X86_SSE2;
2812 for (uint32_t n = 5; n < 8; n++) {
2813 for (size_t k = 1; k <= 40; k += 9) {
2814 GemmMicrokernelTester()
2815 .mr(2)
2816 .nr(4)
2817 .kr(2)
2818 .sr(1)
2819 .m(2)
2820 .n(4)
2821 .k(k)
2822 .cn_stride(7)
2823 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
2824 }
2825 }
2826 }
2827
2828 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, n_gt_4_subtile) {
2829 TEST_REQUIRES_X86_SSE2;
2830 for (uint32_t n = 5; n < 8; n++) {
2831 for (size_t k = 1; k <= 40; k += 9) {
2832 for (uint32_t m = 1; m <= 2; m++) {
2833 GemmMicrokernelTester()
2834 .mr(2)
2835 .nr(4)
2836 .kr(2)
2837 .sr(1)
2838 .m(m)
2839 .n(n)
2840 .k(k)
2841 .iterations(1)
2842 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
2843 }
2844 }
2845 }
2846 }
2847
2848 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, n_div_4) {
2849 TEST_REQUIRES_X86_SSE2;
2850 for (uint32_t n = 8; n <= 12; n += 4) {
2851 for (size_t k = 1; k <= 40; k += 9) {
2852 GemmMicrokernelTester()
2853 .mr(2)
2854 .nr(4)
2855 .kr(2)
2856 .sr(1)
2857 .m(2)
2858 .n(4)
2859 .k(k)
2860 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
2861 }
2862 }
2863 }
2864
2865 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, n_div_4_strided_cn) {
2866 TEST_REQUIRES_X86_SSE2;
2867 for (uint32_t n = 8; n <= 12; n += 4) {
2868 for (size_t k = 1; k <= 40; k += 9) {
2869 GemmMicrokernelTester()
2870 .mr(2)
2871 .nr(4)
2872 .kr(2)
2873 .sr(1)
2874 .m(2)
2875 .n(n)
2876 .k(k)
2877 .cn_stride(7)
2878 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
2879 }
2880 }
2881 }
2882
2883 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, n_div_4_subtile) {
2884 TEST_REQUIRES_X86_SSE2;
2885 for (uint32_t n = 8; n <= 12; n += 4) {
2886 for (size_t k = 1; k <= 40; k += 9) {
2887 for (uint32_t m = 1; m <= 2; m++) {
2888 GemmMicrokernelTester()
2889 .mr(2)
2890 .nr(4)
2891 .kr(2)
2892 .sr(1)
2893 .m(m)
2894 .n(n)
2895 .k(k)
2896 .iterations(1)
2897 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
2898 }
2899 }
2900 }
2901 }
2902
2903 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, small_kernel) {
2904 TEST_REQUIRES_X86_SSE2;
2905 for (size_t k = 1; k <= 40; k += 9) {
2906 GemmMicrokernelTester()
2907 .mr(2)
2908 .nr(4)
2909 .kr(2)
2910 .sr(1)
2911 .m(2)
2912 .n(4)
2913 .k(k)
2914 .ks(3)
2915 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
2916 }
2917 }
2918
2919 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, small_kernel_subtile) {
2920 TEST_REQUIRES_X86_SSE2;
2921 for (size_t k = 1; k <= 40; k += 9) {
2922 for (uint32_t m = 1; m <= 2; m++) {
2923 for (uint32_t n = 1; n <= 4; n++) {
2924 GemmMicrokernelTester()
2925 .mr(2)
2926 .nr(4)
2927 .kr(2)
2928 .sr(1)
2929 .m(m)
2930 .n(n)
2931 .k(k)
2932 .ks(3)
2933 .iterations(1)
2934 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
2935 }
2936 }
2937 }
2938 }
2939
2940 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, n_gt_4_small_kernel) {
2941 TEST_REQUIRES_X86_SSE2;
2942 for (uint32_t n = 5; n < 8; n++) {
2943 for (size_t k = 1; k <= 40; k += 9) {
2944 GemmMicrokernelTester()
2945 .mr(2)
2946 .nr(4)
2947 .kr(2)
2948 .sr(1)
2949 .m(2)
2950 .n(4)
2951 .k(k)
2952 .ks(3)
2953 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
2954 }
2955 }
2956 }
2957
2958 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, n_div_4_small_kernel) {
2959 TEST_REQUIRES_X86_SSE2;
2960 for (uint32_t n = 8; n <= 12; n += 4) {
2961 for (size_t k = 1; k <= 40; k += 9) {
2962 GemmMicrokernelTester()
2963 .mr(2)
2964 .nr(4)
2965 .kr(2)
2966 .sr(1)
2967 .m(2)
2968 .n(4)
2969 .k(k)
2970 .ks(3)
2971 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
2972 }
2973 }
2974 }
2975
2976 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, strided_cm_subtile) {
2977 TEST_REQUIRES_X86_SSE2;
2978 for (size_t k = 1; k <= 40; k += 9) {
2979 for (uint32_t m = 1; m <= 2; m++) {
2980 for (uint32_t n = 1; n <= 4; n++) {
2981 GemmMicrokernelTester()
2982 .mr(2)
2983 .nr(4)
2984 .kr(2)
2985 .sr(1)
2986 .m(m)
2987 .n(n)
2988 .k(k)
2989 .cm_stride(7)
2990 .iterations(1)
2991 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
2992 }
2993 }
2994 }
2995 }
2996
2997 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, a_offset) {
2998 TEST_REQUIRES_X86_SSE2;
2999 for (size_t k = 1; k <= 40; k += 9) {
3000 GemmMicrokernelTester()
3001 .mr(2)
3002 .nr(4)
3003 .kr(2)
3004 .sr(1)
3005 .m(2)
3006 .n(4)
3007 .k(k)
3008 .ks(3)
3009 .a_offset(83)
3010 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
3011 }
3012 }
3013
3014 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, zero) {
3015 TEST_REQUIRES_X86_SSE2;
3016 for (uint32_t mz = 0; mz < 2; mz++) {
3017 for (size_t k = 1; k <= 40; k += 9) {
3018 GemmMicrokernelTester()
3019 .mr(2)
3020 .nr(4)
3021 .kr(2)
3022 .sr(1)
3023 .m(2)
3024 .n(4)
3025 .k(k)
3026 .ks(3)
3027 .a_offset(83)
3028 .zero_index(mz)
3029 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
3030 }
3031 }
3032 }
3033
3034 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, qmin) {
3035 TEST_REQUIRES_X86_SSE2;
3036 GemmMicrokernelTester()
3037 .mr(2)
3038 .nr(4)
3039 .kr(2)
3040 .sr(1)
3041 .m(2)
3042 .n(4)
3043 .k(8)
3044 .qmin(128)
3045 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
3046 }
3047
3048 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, qmax) {
3049 TEST_REQUIRES_X86_SSE2;
3050 GemmMicrokernelTester()
3051 .mr(2)
3052 .nr(4)
3053 .kr(2)
3054 .sr(1)
3055 .m(2)
3056 .n(4)
3057 .k(8)
3058 .qmax(128)
3059 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
3060 }
3061
3062 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, strided_cm) {
3063 TEST_REQUIRES_X86_SSE2;
3064 GemmMicrokernelTester()
3065 .mr(2)
3066 .nr(4)
3067 .kr(2)
3068 .sr(1)
3069 .m(2)
3070 .n(4)
3071 .k(8)
3072 .cm_stride(7)
3073 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
3074 }
3075
3076 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, no_a_zero_point) {
3077 TEST_REQUIRES_X86_SSE2;
3078 for (size_t k = 1; k <= 40; k += 9) {
3079 GemmMicrokernelTester()
3080 .mr(2)
3081 .nr(4)
3082 .kr(2)
3083 .sr(1)
3084 .m(2)
3085 .n(4)
3086 .k(k)
3087 .a_zero_point(0)
3088 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
3089 }
3090 }
3091
3092 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, no_b_zero_point) {
3093 TEST_REQUIRES_X86_SSE2;
3094 for (size_t k = 1; k <= 40; k += 9) {
3095 GemmMicrokernelTester()
3096 .mr(2)
3097 .nr(4)
3098 .kr(2)
3099 .sr(1)
3100 .m(2)
3101 .n(4)
3102 .k(k)
3103 .b_zero_point(0)
3104 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
3105 }
3106 }
3107
3108 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, no_zero_point) {
3109 TEST_REQUIRES_X86_SSE2;
3110 for (size_t k = 1; k <= 40; k += 9) {
3111 GemmMicrokernelTester()
3112 .mr(2)
3113 .nr(4)
3114 .kr(2)
3115 .sr(1)
3116 .m(2)
3117 .n(4)
3118 .k(k)
3119 .a_zero_point(0)
3120 .b_zero_point(0)
3121 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
3122 }
3123 }
3124#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3125
3126
3127#if XNN_ARCH_X86 || XNN_ARCH_X86_64
3128 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_eq_8) {
3129 TEST_REQUIRES_X86_SSE2;
3130 GemmMicrokernelTester()
3131 .mr(3)
3132 .nr(4)
3133 .kr(2)
3134 .sr(1)
3135 .m(3)
3136 .n(4)
3137 .k(8)
3138 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
3139 }
3140
3141 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, strided_cn) {
3142 TEST_REQUIRES_X86_SSE2;
3143 GemmMicrokernelTester()
3144 .mr(3)
3145 .nr(4)
3146 .kr(2)
3147 .sr(1)
3148 .m(3)
3149 .n(4)
3150 .k(8)
3151 .cn_stride(7)
3152 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
3153 }
3154
3155 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_eq_8_subtile) {
3156 TEST_REQUIRES_X86_SSE2;
3157 for (uint32_t m = 1; m <= 3; m++) {
3158 for (uint32_t n = 1; n <= 4; n++) {
3159 GemmMicrokernelTester()
3160 .mr(3)
3161 .nr(4)
3162 .kr(2)
3163 .sr(1)
3164 .m(m)
3165 .n(n)
3166 .k(8)
3167 .iterations(1)
3168 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
3169 }
3170 }
3171 }
3172
3173 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_eq_8_subtile_m) {
3174 TEST_REQUIRES_X86_SSE2;
3175 for (uint32_t m = 1; m <= 3; m++) {
3176 GemmMicrokernelTester()
3177 .mr(3)
3178 .nr(4)
3179 .kr(2)
3180 .sr(1)
3181 .m(m)
3182 .n(4)
3183 .k(8)
3184 .iterations(1)
3185 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
3186 }
3187 }
3188
3189 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_eq_8_subtile_n) {
3190 TEST_REQUIRES_X86_SSE2;
3191 for (uint32_t n = 1; n <= 4; n++) {
3192 GemmMicrokernelTester()
3193 .mr(3)
3194 .nr(4)
3195 .kr(2)
3196 .sr(1)
3197 .m(3)
3198 .n(n)
3199 .k(8)
3200 .iterations(1)
3201 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
3202 }
3203 }
3204
3205 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_lt_8) {
3206 TEST_REQUIRES_X86_SSE2;
3207 for (size_t k = 1; k < 8; k++) {
3208 GemmMicrokernelTester()
3209 .mr(3)
3210 .nr(4)
3211 .kr(2)
3212 .sr(1)
3213 .m(3)
3214 .n(4)
3215 .k(k)
3216 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
3217 }
3218 }
3219
3220 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_lt_8_subtile) {
3221 TEST_REQUIRES_X86_SSE2;
3222 for (size_t k = 1; k < 8; k++) {
3223 for (uint32_t m = 1; m <= 3; m++) {
3224 for (uint32_t n = 1; n <= 4; n++) {
3225 GemmMicrokernelTester()
3226 .mr(3)
3227 .nr(4)
3228 .kr(2)
3229 .sr(1)
3230 .m(m)
3231 .n(n)
3232 .k(k)
3233 .iterations(1)
3234 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
3235 }
3236 }
3237 }
3238 }
3239
3240 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_gt_8) {
3241 TEST_REQUIRES_X86_SSE2;
3242 for (size_t k = 9; k < 16; k++) {
3243 GemmMicrokernelTester()
3244 .mr(3)
3245 .nr(4)
3246 .kr(2)
3247 .sr(1)
3248 .m(3)
3249 .n(4)
3250 .k(k)
3251 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
3252 }
3253 }
3254
3255 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_gt_8_subtile) {
3256 TEST_REQUIRES_X86_SSE2;
3257 for (size_t k = 9; k < 16; k++) {
3258 for (uint32_t m = 1; m <= 3; m++) {
3259 for (uint32_t n = 1; n <= 4; n++) {
3260 GemmMicrokernelTester()
3261 .mr(3)
3262 .nr(4)
3263 .kr(2)
3264 .sr(1)
3265 .m(m)
3266 .n(n)
3267 .k(k)
3268 .iterations(1)
3269 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
3270 }
3271 }
3272 }
3273 }
3274
3275 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_div_8) {
3276 TEST_REQUIRES_X86_SSE2;
3277 for (size_t k = 16; k <= 80; k += 8) {
3278 GemmMicrokernelTester()
3279 .mr(3)
3280 .nr(4)
3281 .kr(2)
3282 .sr(1)
3283 .m(3)
3284 .n(4)
3285 .k(k)
3286 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
3287 }
3288 }
3289
3290 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_div_8_subtile) {
3291 TEST_REQUIRES_X86_SSE2;
3292 for (size_t k = 16; k <= 80; k += 8) {
3293 for (uint32_t m = 1; m <= 3; m++) {
3294 for (uint32_t n = 1; n <= 4; n++) {
3295 GemmMicrokernelTester()
3296 .mr(3)
3297 .nr(4)
3298 .kr(2)
3299 .sr(1)
3300 .m(m)
3301 .n(n)
3302 .k(k)
3303 .iterations(1)
3304 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
3305 }
3306 }
3307 }
3308 }
3309
3310 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_gt_4) {
3311 TEST_REQUIRES_X86_SSE2;
3312 for (uint32_t n = 5; n < 8; n++) {
3313 for (size_t k = 1; k <= 40; k += 9) {
3314 GemmMicrokernelTester()
3315 .mr(3)
3316 .nr(4)
3317 .kr(2)
3318 .sr(1)
3319 .m(3)
3320 .n(4)
3321 .k(k)
3322 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
3323 }
3324 }
3325 }
3326
3327 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_gt_4_strided_cn) {
3328 TEST_REQUIRES_X86_SSE2;
3329 for (uint32_t n = 5; n < 8; n++) {
3330 for (size_t k = 1; k <= 40; k += 9) {
3331 GemmMicrokernelTester()
3332 .mr(3)
3333 .nr(4)
3334 .kr(2)
3335 .sr(1)
3336 .m(3)
3337 .n(4)
3338 .k(k)
3339 .cn_stride(7)
3340 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
3341 }
3342 }
3343 }
3344
3345 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_gt_4_subtile) {
3346 TEST_REQUIRES_X86_SSE2;
3347 for (uint32_t n = 5; n < 8; n++) {
3348 for (size_t k = 1; k <= 40; k += 9) {
3349 for (uint32_t m = 1; m <= 3; m++) {
3350 GemmMicrokernelTester()
3351 .mr(3)
3352 .nr(4)
3353 .kr(2)
3354 .sr(1)
3355 .m(m)
3356 .n(n)
3357 .k(k)
3358 .iterations(1)
3359 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
3360 }
3361 }
3362 }
3363 }
3364
3365 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_div_4) {
3366 TEST_REQUIRES_X86_SSE2;
3367 for (uint32_t n = 8; n <= 12; n += 4) {
3368 for (size_t k = 1; k <= 40; k += 9) {
3369 GemmMicrokernelTester()
3370 .mr(3)
3371 .nr(4)
3372 .kr(2)
3373 .sr(1)
3374 .m(3)
3375 .n(4)
3376 .k(k)
3377 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
3378 }
3379 }
3380 }
3381
3382 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_div_4_strided_cn) {
3383 TEST_REQUIRES_X86_SSE2;
3384 for (uint32_t n = 8; n <= 12; n += 4) {
3385 for (size_t k = 1; k <= 40; k += 9) {
3386 GemmMicrokernelTester()
3387 .mr(3)
3388 .nr(4)
3389 .kr(2)
3390 .sr(1)
3391 .m(3)
3392 .n(n)
3393 .k(k)
3394 .cn_stride(7)
3395 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
3396 }
3397 }
3398 }
3399
3400 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_div_4_subtile) {
3401 TEST_REQUIRES_X86_SSE2;
3402 for (uint32_t n = 8; n <= 12; n += 4) {
3403 for (size_t k = 1; k <= 40; k += 9) {
3404 for (uint32_t m = 1; m <= 3; m++) {
3405 GemmMicrokernelTester()
3406 .mr(3)
3407 .nr(4)
3408 .kr(2)
3409 .sr(1)
3410 .m(m)
3411 .n(n)
3412 .k(k)
3413 .iterations(1)
3414 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
3415 }
3416 }
3417 }
3418 }
3419
3420 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, small_kernel) {
3421 TEST_REQUIRES_X86_SSE2;
3422 for (size_t k = 1; k <= 40; k += 9) {
3423 GemmMicrokernelTester()
3424 .mr(3)
3425 .nr(4)
3426 .kr(2)
3427 .sr(1)
3428 .m(3)
3429 .n(4)
3430 .k(k)
3431 .ks(3)
3432 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
3433 }
3434 }
3435
3436 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, small_kernel_subtile) {
3437 TEST_REQUIRES_X86_SSE2;
3438 for (size_t k = 1; k <= 40; k += 9) {
3439 for (uint32_t m = 1; m <= 3; m++) {
3440 for (uint32_t n = 1; n <= 4; n++) {
3441 GemmMicrokernelTester()
3442 .mr(3)
3443 .nr(4)
3444 .kr(2)
3445 .sr(1)
3446 .m(m)
3447 .n(n)
3448 .k(k)
3449 .ks(3)
3450 .iterations(1)
3451 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
3452 }
3453 }
3454 }
3455 }
3456
3457 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_gt_4_small_kernel) {
3458 TEST_REQUIRES_X86_SSE2;
3459 for (uint32_t n = 5; n < 8; n++) {
3460 for (size_t k = 1; k <= 40; k += 9) {
3461 GemmMicrokernelTester()
3462 .mr(3)
3463 .nr(4)
3464 .kr(2)
3465 .sr(1)
3466 .m(3)
3467 .n(4)
3468 .k(k)
3469 .ks(3)
3470 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
3471 }
3472 }
3473 }
3474
3475 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_div_4_small_kernel) {
3476 TEST_REQUIRES_X86_SSE2;
3477 for (uint32_t n = 8; n <= 12; n += 4) {
3478 for (size_t k = 1; k <= 40; k += 9) {
3479 GemmMicrokernelTester()
3480 .mr(3)
3481 .nr(4)
3482 .kr(2)
3483 .sr(1)
3484 .m(3)
3485 .n(4)
3486 .k(k)
3487 .ks(3)
3488 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
3489 }
3490 }
3491 }
3492
3493 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, strided_cm_subtile) {
3494 TEST_REQUIRES_X86_SSE2;
3495 for (size_t k = 1; k <= 40; k += 9) {
3496 for (uint32_t m = 1; m <= 3; m++) {
3497 for (uint32_t n = 1; n <= 4; n++) {
3498 GemmMicrokernelTester()
3499 .mr(3)
3500 .nr(4)
3501 .kr(2)
3502 .sr(1)
3503 .m(m)
3504 .n(n)
3505 .k(k)
3506 .cm_stride(7)
3507 .iterations(1)
3508 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
3509 }
3510 }
3511 }
3512 }
3513
3514 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, a_offset) {
3515 TEST_REQUIRES_X86_SSE2;
3516 for (size_t k = 1; k <= 40; k += 9) {
3517 GemmMicrokernelTester()
3518 .mr(3)
3519 .nr(4)
3520 .kr(2)
3521 .sr(1)
3522 .m(3)
3523 .n(4)
3524 .k(k)
3525 .ks(3)
3526 .a_offset(127)
3527 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
3528 }
3529 }
3530
3531 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, zero) {
3532 TEST_REQUIRES_X86_SSE2;
3533 for (uint32_t mz = 0; mz < 3; mz++) {
3534 for (size_t k = 1; k <= 40; k += 9) {
3535 GemmMicrokernelTester()
3536 .mr(3)
3537 .nr(4)
3538 .kr(2)
3539 .sr(1)
3540 .m(3)
3541 .n(4)
3542 .k(k)
3543 .ks(3)
3544 .a_offset(127)
3545 .zero_index(mz)
3546 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
3547 }
3548 }
3549 }
3550
3551 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, qmin) {
3552 TEST_REQUIRES_X86_SSE2;
3553 GemmMicrokernelTester()
3554 .mr(3)
3555 .nr(4)
3556 .kr(2)
3557 .sr(1)
3558 .m(3)
3559 .n(4)
3560 .k(8)
3561 .qmin(128)
3562 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
3563 }
3564
3565 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, qmax) {
3566 TEST_REQUIRES_X86_SSE2;
3567 GemmMicrokernelTester()
3568 .mr(3)
3569 .nr(4)
3570 .kr(2)
3571 .sr(1)
3572 .m(3)
3573 .n(4)
3574 .k(8)
3575 .qmax(128)
3576 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
3577 }
3578
3579 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, strided_cm) {
3580 TEST_REQUIRES_X86_SSE2;
3581 GemmMicrokernelTester()
3582 .mr(3)
3583 .nr(4)
3584 .kr(2)
3585 .sr(1)
3586 .m(3)
3587 .n(4)
3588 .k(8)
3589 .cm_stride(7)
3590 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
3591 }
3592
3593 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, no_a_zero_point) {
3594 TEST_REQUIRES_X86_SSE2;
3595 for (size_t k = 1; k <= 40; k += 9) {
3596 GemmMicrokernelTester()
3597 .mr(3)
3598 .nr(4)
3599 .kr(2)
3600 .sr(1)
3601 .m(3)
3602 .n(4)
3603 .k(k)
3604 .a_zero_point(0)
3605 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
3606 }
3607 }
3608
3609 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, no_b_zero_point) {
3610 TEST_REQUIRES_X86_SSE2;
3611 for (size_t k = 1; k <= 40; k += 9) {
3612 GemmMicrokernelTester()
3613 .mr(3)
3614 .nr(4)
3615 .kr(2)
3616 .sr(1)
3617 .m(3)
3618 .n(4)
3619 .k(k)
3620 .b_zero_point(0)
3621 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
3622 }
3623 }
3624
3625 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, no_zero_point) {
3626 TEST_REQUIRES_X86_SSE2;
3627 for (size_t k = 1; k <= 40; k += 9) {
3628 GemmMicrokernelTester()
3629 .mr(3)
3630 .nr(4)
3631 .kr(2)
3632 .sr(1)
3633 .m(3)
3634 .n(4)
3635 .k(k)
3636 .a_zero_point(0)
3637 .b_zero_point(0)
3638 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
3639 }
3640 }
3641#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3642
3643
3644#if XNN_ARCH_X86 || XNN_ARCH_X86_64
3645 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_eq_8) {
3646 TEST_REQUIRES_X86_SSE2;
3647 GemmMicrokernelTester()
3648 .mr(4)
3649 .nr(4)
3650 .kr(2)
3651 .sr(1)
3652 .m(4)
3653 .n(4)
3654 .k(8)
3655 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
3656 }
3657
3658 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, strided_cn) {
3659 TEST_REQUIRES_X86_SSE2;
3660 GemmMicrokernelTester()
3661 .mr(4)
3662 .nr(4)
3663 .kr(2)
3664 .sr(1)
3665 .m(4)
3666 .n(4)
3667 .k(8)
3668 .cn_stride(7)
3669 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
3670 }
3671
3672 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_eq_8_subtile) {
3673 TEST_REQUIRES_X86_SSE2;
3674 for (uint32_t m = 1; m <= 4; m++) {
3675 for (uint32_t n = 1; n <= 4; n++) {
3676 GemmMicrokernelTester()
3677 .mr(4)
3678 .nr(4)
3679 .kr(2)
3680 .sr(1)
3681 .m(m)
3682 .n(n)
3683 .k(8)
3684 .iterations(1)
3685 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
3686 }
3687 }
3688 }
3689
3690 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_eq_8_subtile_m) {
3691 TEST_REQUIRES_X86_SSE2;
3692 for (uint32_t m = 1; m <= 4; m++) {
3693 GemmMicrokernelTester()
3694 .mr(4)
3695 .nr(4)
3696 .kr(2)
3697 .sr(1)
3698 .m(m)
3699 .n(4)
3700 .k(8)
3701 .iterations(1)
3702 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
3703 }
3704 }
3705
3706 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_eq_8_subtile_n) {
3707 TEST_REQUIRES_X86_SSE2;
3708 for (uint32_t n = 1; n <= 4; n++) {
3709 GemmMicrokernelTester()
3710 .mr(4)
3711 .nr(4)
3712 .kr(2)
3713 .sr(1)
3714 .m(4)
3715 .n(n)
3716 .k(8)
3717 .iterations(1)
3718 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
3719 }
3720 }
3721
3722 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_lt_8) {
3723 TEST_REQUIRES_X86_SSE2;
3724 for (size_t k = 1; k < 8; k++) {
3725 GemmMicrokernelTester()
3726 .mr(4)
3727 .nr(4)
3728 .kr(2)
3729 .sr(1)
3730 .m(4)
3731 .n(4)
3732 .k(k)
3733 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
3734 }
3735 }
3736
3737 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_lt_8_subtile) {
3738 TEST_REQUIRES_X86_SSE2;
3739 for (size_t k = 1; k < 8; k++) {
3740 for (uint32_t m = 1; m <= 4; m++) {
3741 for (uint32_t n = 1; n <= 4; n++) {
3742 GemmMicrokernelTester()
3743 .mr(4)
3744 .nr(4)
3745 .kr(2)
3746 .sr(1)
3747 .m(m)
3748 .n(n)
3749 .k(k)
3750 .iterations(1)
3751 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
3752 }
3753 }
3754 }
3755 }
3756
3757 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_gt_8) {
3758 TEST_REQUIRES_X86_SSE2;
3759 for (size_t k = 9; k < 16; k++) {
3760 GemmMicrokernelTester()
3761 .mr(4)
3762 .nr(4)
3763 .kr(2)
3764 .sr(1)
3765 .m(4)
3766 .n(4)
3767 .k(k)
3768 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
3769 }
3770 }
3771
3772 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_gt_8_subtile) {
3773 TEST_REQUIRES_X86_SSE2;
3774 for (size_t k = 9; k < 16; k++) {
3775 for (uint32_t m = 1; m <= 4; m++) {
3776 for (uint32_t n = 1; n <= 4; n++) {
3777 GemmMicrokernelTester()
3778 .mr(4)
3779 .nr(4)
3780 .kr(2)
3781 .sr(1)
3782 .m(m)
3783 .n(n)
3784 .k(k)
3785 .iterations(1)
3786 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
3787 }
3788 }
3789 }
3790 }
3791
3792 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_div_8) {
3793 TEST_REQUIRES_X86_SSE2;
3794 for (size_t k = 16; k <= 80; k += 8) {
3795 GemmMicrokernelTester()
3796 .mr(4)
3797 .nr(4)
3798 .kr(2)
3799 .sr(1)
3800 .m(4)
3801 .n(4)
3802 .k(k)
3803 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
3804 }
3805 }
3806
3807 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_div_8_subtile) {
3808 TEST_REQUIRES_X86_SSE2;
3809 for (size_t k = 16; k <= 80; k += 8) {
3810 for (uint32_t m = 1; m <= 4; m++) {
3811 for (uint32_t n = 1; n <= 4; n++) {
3812 GemmMicrokernelTester()
3813 .mr(4)
3814 .nr(4)
3815 .kr(2)
3816 .sr(1)
3817 .m(m)
3818 .n(n)
3819 .k(k)
3820 .iterations(1)
3821 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
3822 }
3823 }
3824 }
3825 }
3826
3827 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, n_gt_4) {
3828 TEST_REQUIRES_X86_SSE2;
3829 for (uint32_t n = 5; n < 8; n++) {
3830 for (size_t k = 1; k <= 40; k += 9) {
3831 GemmMicrokernelTester()
3832 .mr(4)
3833 .nr(4)
3834 .kr(2)
3835 .sr(1)
3836 .m(4)
3837 .n(4)
3838 .k(k)
3839 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
3840 }
3841 }
3842 }
3843
3844 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, n_gt_4_strided_cn) {
3845 TEST_REQUIRES_X86_SSE2;
3846 for (uint32_t n = 5; n < 8; n++) {
3847 for (size_t k = 1; k <= 40; k += 9) {
3848 GemmMicrokernelTester()
3849 .mr(4)
3850 .nr(4)
3851 .kr(2)
3852 .sr(1)
3853 .m(4)
3854 .n(4)
3855 .k(k)
3856 .cn_stride(7)
3857 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
3858 }
3859 }
3860 }
3861
3862 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, n_gt_4_subtile) {
3863 TEST_REQUIRES_X86_SSE2;
3864 for (uint32_t n = 5; n < 8; n++) {
3865 for (size_t k = 1; k <= 40; k += 9) {
3866 for (uint32_t m = 1; m <= 4; m++) {
3867 GemmMicrokernelTester()
3868 .mr(4)
3869 .nr(4)
3870 .kr(2)
3871 .sr(1)
3872 .m(m)
3873 .n(n)
3874 .k(k)
3875 .iterations(1)
3876 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
3877 }
3878 }
3879 }
3880 }
3881
3882 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, n_div_4) {
3883 TEST_REQUIRES_X86_SSE2;
3884 for (uint32_t n = 8; n <= 12; n += 4) {
3885 for (size_t k = 1; k <= 40; k += 9) {
3886 GemmMicrokernelTester()
3887 .mr(4)
3888 .nr(4)
3889 .kr(2)
3890 .sr(1)
3891 .m(4)
3892 .n(4)
3893 .k(k)
3894 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
3895 }
3896 }
3897 }
3898
3899 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, n_div_4_strided_cn) {
3900 TEST_REQUIRES_X86_SSE2;
3901 for (uint32_t n = 8; n <= 12; n += 4) {
3902 for (size_t k = 1; k <= 40; k += 9) {
3903 GemmMicrokernelTester()
3904 .mr(4)
3905 .nr(4)
3906 .kr(2)
3907 .sr(1)
3908 .m(4)
3909 .n(n)
3910 .k(k)
3911 .cn_stride(7)
3912 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
3913 }
3914 }
3915 }
3916
3917 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, n_div_4_subtile) {
3918 TEST_REQUIRES_X86_SSE2;
3919 for (uint32_t n = 8; n <= 12; n += 4) {
3920 for (size_t k = 1; k <= 40; k += 9) {
3921 for (uint32_t m = 1; m <= 4; m++) {
3922 GemmMicrokernelTester()
3923 .mr(4)
3924 .nr(4)
3925 .kr(2)
3926 .sr(1)
3927 .m(m)
3928 .n(n)
3929 .k(k)
3930 .iterations(1)
3931 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
3932 }
3933 }
3934 }
3935 }
3936
3937 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, small_kernel) {
3938 TEST_REQUIRES_X86_SSE2;
3939 for (size_t k = 1; k <= 40; k += 9) {
3940 GemmMicrokernelTester()
3941 .mr(4)
3942 .nr(4)
3943 .kr(2)
3944 .sr(1)
3945 .m(4)
3946 .n(4)
3947 .k(k)
3948 .ks(3)
3949 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
3950 }
3951 }
3952
3953 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, small_kernel_subtile) {
3954 TEST_REQUIRES_X86_SSE2;
3955 for (size_t k = 1; k <= 40; k += 9) {
3956 for (uint32_t m = 1; m <= 4; m++) {
3957 for (uint32_t n = 1; n <= 4; n++) {
3958 GemmMicrokernelTester()
3959 .mr(4)
3960 .nr(4)
3961 .kr(2)
3962 .sr(1)
3963 .m(m)
3964 .n(n)
3965 .k(k)
3966 .ks(3)
3967 .iterations(1)
3968 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
3969 }
3970 }
3971 }
3972 }
3973
3974 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, n_gt_4_small_kernel) {
3975 TEST_REQUIRES_X86_SSE2;
3976 for (uint32_t n = 5; n < 8; n++) {
3977 for (size_t k = 1; k <= 40; k += 9) {
3978 GemmMicrokernelTester()
3979 .mr(4)
3980 .nr(4)
3981 .kr(2)
3982 .sr(1)
3983 .m(4)
3984 .n(4)
3985 .k(k)
3986 .ks(3)
3987 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
3988 }
3989 }
3990 }
3991
3992 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, n_div_4_small_kernel) {
3993 TEST_REQUIRES_X86_SSE2;
3994 for (uint32_t n = 8; n <= 12; n += 4) {
3995 for (size_t k = 1; k <= 40; k += 9) {
3996 GemmMicrokernelTester()
3997 .mr(4)
3998 .nr(4)
3999 .kr(2)
4000 .sr(1)
4001 .m(4)
4002 .n(4)
4003 .k(k)
4004 .ks(3)
4005 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
4006 }
4007 }
4008 }
4009
4010 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, strided_cm_subtile) {
4011 TEST_REQUIRES_X86_SSE2;
4012 for (size_t k = 1; k <= 40; k += 9) {
4013 for (uint32_t m = 1; m <= 4; m++) {
4014 for (uint32_t n = 1; n <= 4; n++) {
4015 GemmMicrokernelTester()
4016 .mr(4)
4017 .nr(4)
4018 .kr(2)
4019 .sr(1)
4020 .m(m)
4021 .n(n)
4022 .k(k)
4023 .cm_stride(7)
4024 .iterations(1)
4025 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
4026 }
4027 }
4028 }
4029 }
4030
4031 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, a_offset) {
4032 TEST_REQUIRES_X86_SSE2;
4033 for (size_t k = 1; k <= 40; k += 9) {
4034 GemmMicrokernelTester()
4035 .mr(4)
4036 .nr(4)
4037 .kr(2)
4038 .sr(1)
4039 .m(4)
4040 .n(4)
4041 .k(k)
4042 .ks(3)
4043 .a_offset(163)
4044 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
4045 }
4046 }
4047
4048 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, zero) {
4049 TEST_REQUIRES_X86_SSE2;
4050 for (uint32_t mz = 0; mz < 4; mz++) {
4051 for (size_t k = 1; k <= 40; k += 9) {
4052 GemmMicrokernelTester()
4053 .mr(4)
4054 .nr(4)
4055 .kr(2)
4056 .sr(1)
4057 .m(4)
4058 .n(4)
4059 .k(k)
4060 .ks(3)
4061 .a_offset(163)
4062 .zero_index(mz)
4063 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
4064 }
4065 }
4066 }
4067
4068 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, qmin) {
4069 TEST_REQUIRES_X86_SSE2;
4070 GemmMicrokernelTester()
4071 .mr(4)
4072 .nr(4)
4073 .kr(2)
4074 .sr(1)
4075 .m(4)
4076 .n(4)
4077 .k(8)
4078 .qmin(128)
4079 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
4080 }
4081
4082 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, qmax) {
4083 TEST_REQUIRES_X86_SSE2;
4084 GemmMicrokernelTester()
4085 .mr(4)
4086 .nr(4)
4087 .kr(2)
4088 .sr(1)
4089 .m(4)
4090 .n(4)
4091 .k(8)
4092 .qmax(128)
4093 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
4094 }
4095
4096 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, strided_cm) {
4097 TEST_REQUIRES_X86_SSE2;
4098 GemmMicrokernelTester()
4099 .mr(4)
4100 .nr(4)
4101 .kr(2)
4102 .sr(1)
4103 .m(4)
4104 .n(4)
4105 .k(8)
4106 .cm_stride(7)
4107 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
4108 }
4109
4110 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, no_a_zero_point) {
4111 TEST_REQUIRES_X86_SSE2;
4112 for (size_t k = 1; k <= 40; k += 9) {
4113 GemmMicrokernelTester()
4114 .mr(4)
4115 .nr(4)
4116 .kr(2)
4117 .sr(1)
4118 .m(4)
4119 .n(4)
4120 .k(k)
4121 .a_zero_point(0)
4122 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
4123 }
4124 }
4125
4126 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, no_b_zero_point) {
4127 TEST_REQUIRES_X86_SSE2;
4128 for (size_t k = 1; k <= 40; k += 9) {
4129 GemmMicrokernelTester()
4130 .mr(4)
4131 .nr(4)
4132 .kr(2)
4133 .sr(1)
4134 .m(4)
4135 .n(4)
4136 .k(k)
4137 .b_zero_point(0)
4138 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
4139 }
4140 }
4141
4142 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, no_zero_point) {
4143 TEST_REQUIRES_X86_SSE2;
4144 for (size_t k = 1; k <= 40; k += 9) {
4145 GemmMicrokernelTester()
4146 .mr(4)
4147 .nr(4)
4148 .kr(2)
4149 .sr(1)
4150 .m(4)
4151 .n(4)
4152 .k(k)
4153 .a_zero_point(0)
4154 .b_zero_point(0)
4155 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
4156 }
4157 }
4158#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4159
4160
4161#if XNN_ARCH_X86 || XNN_ARCH_X86_64
4162 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_eq_8) {
4163 TEST_REQUIRES_X86_SSE41;
4164 GemmMicrokernelTester()
4165 .mr(1)
4166 .nr(4)
4167 .kr(2)
4168 .sr(1)
4169 .m(1)
4170 .n(4)
4171 .k(8)
4172 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
4173 }
4174
4175 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, strided_cn) {
4176 TEST_REQUIRES_X86_SSE41;
4177 GemmMicrokernelTester()
4178 .mr(1)
4179 .nr(4)
4180 .kr(2)
4181 .sr(1)
4182 .m(1)
4183 .n(4)
4184 .k(8)
4185 .cn_stride(7)
4186 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
4187 }
4188
4189 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_eq_8_subtile) {
4190 TEST_REQUIRES_X86_SSE41;
4191 for (uint32_t m = 1; m <= 1; m++) {
4192 for (uint32_t n = 1; n <= 4; n++) {
4193 GemmMicrokernelTester()
4194 .mr(1)
4195 .nr(4)
4196 .kr(2)
4197 .sr(1)
4198 .m(m)
4199 .n(n)
4200 .k(8)
4201 .iterations(1)
4202 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
4203 }
4204 }
4205 }
4206
4207 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_eq_8_subtile_m) {
4208 TEST_REQUIRES_X86_SSE41;
4209 for (uint32_t m = 1; m <= 1; m++) {
4210 GemmMicrokernelTester()
4211 .mr(1)
4212 .nr(4)
4213 .kr(2)
4214 .sr(1)
4215 .m(m)
4216 .n(4)
4217 .k(8)
4218 .iterations(1)
4219 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
4220 }
4221 }
4222
4223 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_eq_8_subtile_n) {
4224 TEST_REQUIRES_X86_SSE41;
4225 for (uint32_t n = 1; n <= 4; n++) {
4226 GemmMicrokernelTester()
4227 .mr(1)
4228 .nr(4)
4229 .kr(2)
4230 .sr(1)
4231 .m(1)
4232 .n(n)
4233 .k(8)
4234 .iterations(1)
4235 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
4236 }
4237 }
4238
4239 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_lt_8) {
4240 TEST_REQUIRES_X86_SSE41;
4241 for (size_t k = 1; k < 8; k++) {
4242 GemmMicrokernelTester()
4243 .mr(1)
4244 .nr(4)
4245 .kr(2)
4246 .sr(1)
4247 .m(1)
4248 .n(4)
4249 .k(k)
4250 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
4251 }
4252 }
4253
4254 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_lt_8_subtile) {
4255 TEST_REQUIRES_X86_SSE41;
4256 for (size_t k = 1; k < 8; k++) {
4257 for (uint32_t m = 1; m <= 1; m++) {
4258 for (uint32_t n = 1; n <= 4; n++) {
4259 GemmMicrokernelTester()
4260 .mr(1)
4261 .nr(4)
4262 .kr(2)
4263 .sr(1)
4264 .m(m)
4265 .n(n)
4266 .k(k)
4267 .iterations(1)
4268 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
4269 }
4270 }
4271 }
4272 }
4273
4274 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_gt_8) {
4275 TEST_REQUIRES_X86_SSE41;
4276 for (size_t k = 9; k < 16; k++) {
4277 GemmMicrokernelTester()
4278 .mr(1)
4279 .nr(4)
4280 .kr(2)
4281 .sr(1)
4282 .m(1)
4283 .n(4)
4284 .k(k)
4285 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
4286 }
4287 }
4288
4289 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_gt_8_subtile) {
4290 TEST_REQUIRES_X86_SSE41;
4291 for (size_t k = 9; k < 16; k++) {
4292 for (uint32_t m = 1; m <= 1; m++) {
4293 for (uint32_t n = 1; n <= 4; n++) {
4294 GemmMicrokernelTester()
4295 .mr(1)
4296 .nr(4)
4297 .kr(2)
4298 .sr(1)
4299 .m(m)
4300 .n(n)
4301 .k(k)
4302 .iterations(1)
4303 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
4304 }
4305 }
4306 }
4307 }
4308
4309 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_div_8) {
4310 TEST_REQUIRES_X86_SSE41;
4311 for (size_t k = 16; k <= 80; k += 8) {
4312 GemmMicrokernelTester()
4313 .mr(1)
4314 .nr(4)
4315 .kr(2)
4316 .sr(1)
4317 .m(1)
4318 .n(4)
4319 .k(k)
4320 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
4321 }
4322 }
4323
4324 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_div_8_subtile) {
4325 TEST_REQUIRES_X86_SSE41;
4326 for (size_t k = 16; k <= 80; k += 8) {
4327 for (uint32_t m = 1; m <= 1; m++) {
4328 for (uint32_t n = 1; n <= 4; n++) {
4329 GemmMicrokernelTester()
4330 .mr(1)
4331 .nr(4)
4332 .kr(2)
4333 .sr(1)
4334 .m(m)
4335 .n(n)
4336 .k(k)
4337 .iterations(1)
4338 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
4339 }
4340 }
4341 }
4342 }
4343
4344 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, n_gt_4) {
4345 TEST_REQUIRES_X86_SSE41;
4346 for (uint32_t n = 5; n < 8; n++) {
4347 for (size_t k = 1; k <= 40; k += 9) {
4348 GemmMicrokernelTester()
4349 .mr(1)
4350 .nr(4)
4351 .kr(2)
4352 .sr(1)
4353 .m(1)
4354 .n(4)
4355 .k(k)
4356 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
4357 }
4358 }
4359 }
4360
4361 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, n_gt_4_strided_cn) {
4362 TEST_REQUIRES_X86_SSE41;
4363 for (uint32_t n = 5; n < 8; n++) {
4364 for (size_t k = 1; k <= 40; k += 9) {
4365 GemmMicrokernelTester()
4366 .mr(1)
4367 .nr(4)
4368 .kr(2)
4369 .sr(1)
4370 .m(1)
4371 .n(4)
4372 .k(k)
4373 .cn_stride(7)
4374 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
4375 }
4376 }
4377 }
4378
4379 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, n_gt_4_subtile) {
4380 TEST_REQUIRES_X86_SSE41;
4381 for (uint32_t n = 5; n < 8; n++) {
4382 for (size_t k = 1; k <= 40; k += 9) {
4383 for (uint32_t m = 1; m <= 1; m++) {
4384 GemmMicrokernelTester()
4385 .mr(1)
4386 .nr(4)
4387 .kr(2)
4388 .sr(1)
4389 .m(m)
4390 .n(n)
4391 .k(k)
4392 .iterations(1)
4393 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
4394 }
4395 }
4396 }
4397 }
4398
4399 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, n_div_4) {
4400 TEST_REQUIRES_X86_SSE41;
4401 for (uint32_t n = 8; n <= 12; n += 4) {
4402 for (size_t k = 1; k <= 40; k += 9) {
4403 GemmMicrokernelTester()
4404 .mr(1)
4405 .nr(4)
4406 .kr(2)
4407 .sr(1)
4408 .m(1)
4409 .n(4)
4410 .k(k)
4411 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
4412 }
4413 }
4414 }
4415
4416 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, n_div_4_strided_cn) {
4417 TEST_REQUIRES_X86_SSE41;
4418 for (uint32_t n = 8; n <= 12; n += 4) {
4419 for (size_t k = 1; k <= 40; k += 9) {
4420 GemmMicrokernelTester()
4421 .mr(1)
4422 .nr(4)
4423 .kr(2)
4424 .sr(1)
4425 .m(1)
4426 .n(n)
4427 .k(k)
4428 .cn_stride(7)
4429 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
4430 }
4431 }
4432 }
4433
4434 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, n_div_4_subtile) {
4435 TEST_REQUIRES_X86_SSE41;
4436 for (uint32_t n = 8; n <= 12; n += 4) {
4437 for (size_t k = 1; k <= 40; k += 9) {
4438 for (uint32_t m = 1; m <= 1; m++) {
4439 GemmMicrokernelTester()
4440 .mr(1)
4441 .nr(4)
4442 .kr(2)
4443 .sr(1)
4444 .m(m)
4445 .n(n)
4446 .k(k)
4447 .iterations(1)
4448 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
4449 }
4450 }
4451 }
4452 }
4453
4454 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, small_kernel) {
4455 TEST_REQUIRES_X86_SSE41;
4456 for (size_t k = 1; k <= 40; k += 9) {
4457 GemmMicrokernelTester()
4458 .mr(1)
4459 .nr(4)
4460 .kr(2)
4461 .sr(1)
4462 .m(1)
4463 .n(4)
4464 .k(k)
4465 .ks(3)
4466 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
4467 }
4468 }
4469
4470 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, small_kernel_subtile) {
4471 TEST_REQUIRES_X86_SSE41;
4472 for (size_t k = 1; k <= 40; k += 9) {
4473 for (uint32_t m = 1; m <= 1; m++) {
4474 for (uint32_t n = 1; n <= 4; n++) {
4475 GemmMicrokernelTester()
4476 .mr(1)
4477 .nr(4)
4478 .kr(2)
4479 .sr(1)
4480 .m(m)
4481 .n(n)
4482 .k(k)
4483 .ks(3)
4484 .iterations(1)
4485 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
4486 }
4487 }
4488 }
4489 }
4490
4491 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, n_gt_4_small_kernel) {
4492 TEST_REQUIRES_X86_SSE41;
4493 for (uint32_t n = 5; n < 8; n++) {
4494 for (size_t k = 1; k <= 40; k += 9) {
4495 GemmMicrokernelTester()
4496 .mr(1)
4497 .nr(4)
4498 .kr(2)
4499 .sr(1)
4500 .m(1)
4501 .n(4)
4502 .k(k)
4503 .ks(3)
4504 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
4505 }
4506 }
4507 }
4508
4509 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, n_div_4_small_kernel) {
4510 TEST_REQUIRES_X86_SSE41;
4511 for (uint32_t n = 8; n <= 12; n += 4) {
4512 for (size_t k = 1; k <= 40; k += 9) {
4513 GemmMicrokernelTester()
4514 .mr(1)
4515 .nr(4)
4516 .kr(2)
4517 .sr(1)
4518 .m(1)
4519 .n(4)
4520 .k(k)
4521 .ks(3)
4522 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
4523 }
4524 }
4525 }
4526
4527 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, strided_cm_subtile) {
4528 TEST_REQUIRES_X86_SSE41;
4529 for (size_t k = 1; k <= 40; k += 9) {
4530 for (uint32_t m = 1; m <= 1; m++) {
4531 for (uint32_t n = 1; n <= 4; n++) {
4532 GemmMicrokernelTester()
4533 .mr(1)
4534 .nr(4)
4535 .kr(2)
4536 .sr(1)
4537 .m(m)
4538 .n(n)
4539 .k(k)
4540 .cm_stride(7)
4541 .iterations(1)
4542 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
4543 }
4544 }
4545 }
4546 }
4547
4548 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, a_offset) {
4549 TEST_REQUIRES_X86_SSE41;
4550 for (size_t k = 1; k <= 40; k += 9) {
4551 GemmMicrokernelTester()
4552 .mr(1)
4553 .nr(4)
4554 .kr(2)
4555 .sr(1)
4556 .m(1)
4557 .n(4)
4558 .k(k)
4559 .ks(3)
4560 .a_offset(43)
4561 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
4562 }
4563 }
4564
4565 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, zero) {
4566 TEST_REQUIRES_X86_SSE41;
4567 for (uint32_t mz = 0; mz < 1; mz++) {
4568 for (size_t k = 1; k <= 40; k += 9) {
4569 GemmMicrokernelTester()
4570 .mr(1)
4571 .nr(4)
4572 .kr(2)
4573 .sr(1)
4574 .m(1)
4575 .n(4)
4576 .k(k)
4577 .ks(3)
4578 .a_offset(43)
4579 .zero_index(mz)
4580 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
4581 }
4582 }
4583 }
4584
4585 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, qmin) {
4586 TEST_REQUIRES_X86_SSE41;
4587 GemmMicrokernelTester()
4588 .mr(1)
4589 .nr(4)
4590 .kr(2)
4591 .sr(1)
4592 .m(1)
4593 .n(4)
4594 .k(8)
4595 .qmin(128)
4596 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
4597 }
4598
4599 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, qmax) {
4600 TEST_REQUIRES_X86_SSE41;
4601 GemmMicrokernelTester()
4602 .mr(1)
4603 .nr(4)
4604 .kr(2)
4605 .sr(1)
4606 .m(1)
4607 .n(4)
4608 .k(8)
4609 .qmax(128)
4610 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
4611 }
4612
4613 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, strided_cm) {
4614 TEST_REQUIRES_X86_SSE41;
4615 GemmMicrokernelTester()
4616 .mr(1)
4617 .nr(4)
4618 .kr(2)
4619 .sr(1)
4620 .m(1)
4621 .n(4)
4622 .k(8)
4623 .cm_stride(7)
4624 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
4625 }
4626
4627 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, no_a_zero_point) {
4628 TEST_REQUIRES_X86_SSE41;
4629 for (size_t k = 1; k <= 40; k += 9) {
4630 GemmMicrokernelTester()
4631 .mr(1)
4632 .nr(4)
4633 .kr(2)
4634 .sr(1)
4635 .m(1)
4636 .n(4)
4637 .k(k)
4638 .a_zero_point(0)
4639 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
4640 }
4641 }
4642
4643 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, no_b_zero_point) {
4644 TEST_REQUIRES_X86_SSE41;
4645 for (size_t k = 1; k <= 40; k += 9) {
4646 GemmMicrokernelTester()
4647 .mr(1)
4648 .nr(4)
4649 .kr(2)
4650 .sr(1)
4651 .m(1)
4652 .n(4)
4653 .k(k)
4654 .b_zero_point(0)
4655 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
4656 }
4657 }
4658
4659 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, no_zero_point) {
4660 TEST_REQUIRES_X86_SSE41;
4661 for (size_t k = 1; k <= 40; k += 9) {
4662 GemmMicrokernelTester()
4663 .mr(1)
4664 .nr(4)
4665 .kr(2)
4666 .sr(1)
4667 .m(1)
4668 .n(4)
4669 .k(k)
4670 .a_zero_point(0)
4671 .b_zero_point(0)
4672 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
4673 }
4674 }
4675#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4676
4677
4678#if XNN_ARCH_X86 || XNN_ARCH_X86_64
4679 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_eq_8) {
4680 TEST_REQUIRES_X86_SSE41;
4681 GemmMicrokernelTester()
4682 .mr(2)
4683 .nr(4)
4684 .kr(2)
4685 .sr(1)
4686 .m(2)
4687 .n(4)
4688 .k(8)
4689 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
4690 }
4691
4692 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, strided_cn) {
4693 TEST_REQUIRES_X86_SSE41;
4694 GemmMicrokernelTester()
4695 .mr(2)
4696 .nr(4)
4697 .kr(2)
4698 .sr(1)
4699 .m(2)
4700 .n(4)
4701 .k(8)
4702 .cn_stride(7)
4703 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
4704 }
4705
4706 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_eq_8_subtile) {
4707 TEST_REQUIRES_X86_SSE41;
4708 for (uint32_t m = 1; m <= 2; m++) {
4709 for (uint32_t n = 1; n <= 4; n++) {
4710 GemmMicrokernelTester()
4711 .mr(2)
4712 .nr(4)
4713 .kr(2)
4714 .sr(1)
4715 .m(m)
4716 .n(n)
4717 .k(8)
4718 .iterations(1)
4719 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
4720 }
4721 }
4722 }
4723
4724 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_eq_8_subtile_m) {
4725 TEST_REQUIRES_X86_SSE41;
4726 for (uint32_t m = 1; m <= 2; m++) {
4727 GemmMicrokernelTester()
4728 .mr(2)
4729 .nr(4)
4730 .kr(2)
4731 .sr(1)
4732 .m(m)
4733 .n(4)
4734 .k(8)
4735 .iterations(1)
4736 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
4737 }
4738 }
4739
4740 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_eq_8_subtile_n) {
4741 TEST_REQUIRES_X86_SSE41;
4742 for (uint32_t n = 1; n <= 4; n++) {
4743 GemmMicrokernelTester()
4744 .mr(2)
4745 .nr(4)
4746 .kr(2)
4747 .sr(1)
4748 .m(2)
4749 .n(n)
4750 .k(8)
4751 .iterations(1)
4752 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
4753 }
4754 }
4755
4756 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_lt_8) {
4757 TEST_REQUIRES_X86_SSE41;
4758 for (size_t k = 1; k < 8; k++) {
4759 GemmMicrokernelTester()
4760 .mr(2)
4761 .nr(4)
4762 .kr(2)
4763 .sr(1)
4764 .m(2)
4765 .n(4)
4766 .k(k)
4767 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
4768 }
4769 }
4770
4771 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_lt_8_subtile) {
4772 TEST_REQUIRES_X86_SSE41;
4773 for (size_t k = 1; k < 8; k++) {
4774 for (uint32_t m = 1; m <= 2; m++) {
4775 for (uint32_t n = 1; n <= 4; n++) {
4776 GemmMicrokernelTester()
4777 .mr(2)
4778 .nr(4)
4779 .kr(2)
4780 .sr(1)
4781 .m(m)
4782 .n(n)
4783 .k(k)
4784 .iterations(1)
4785 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
4786 }
4787 }
4788 }
4789 }
4790
4791 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_gt_8) {
4792 TEST_REQUIRES_X86_SSE41;
4793 for (size_t k = 9; k < 16; k++) {
4794 GemmMicrokernelTester()
4795 .mr(2)
4796 .nr(4)
4797 .kr(2)
4798 .sr(1)
4799 .m(2)
4800 .n(4)
4801 .k(k)
4802 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
4803 }
4804 }
4805
4806 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_gt_8_subtile) {
4807 TEST_REQUIRES_X86_SSE41;
4808 for (size_t k = 9; k < 16; k++) {
4809 for (uint32_t m = 1; m <= 2; m++) {
4810 for (uint32_t n = 1; n <= 4; n++) {
4811 GemmMicrokernelTester()
4812 .mr(2)
4813 .nr(4)
4814 .kr(2)
4815 .sr(1)
4816 .m(m)
4817 .n(n)
4818 .k(k)
4819 .iterations(1)
4820 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
4821 }
4822 }
4823 }
4824 }
4825
4826 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_div_8) {
4827 TEST_REQUIRES_X86_SSE41;
4828 for (size_t k = 16; k <= 80; k += 8) {
4829 GemmMicrokernelTester()
4830 .mr(2)
4831 .nr(4)
4832 .kr(2)
4833 .sr(1)
4834 .m(2)
4835 .n(4)
4836 .k(k)
4837 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
4838 }
4839 }
4840
4841 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_div_8_subtile) {
4842 TEST_REQUIRES_X86_SSE41;
4843 for (size_t k = 16; k <= 80; k += 8) {
4844 for (uint32_t m = 1; m <= 2; m++) {
4845 for (uint32_t n = 1; n <= 4; n++) {
4846 GemmMicrokernelTester()
4847 .mr(2)
4848 .nr(4)
4849 .kr(2)
4850 .sr(1)
4851 .m(m)
4852 .n(n)
4853 .k(k)
4854 .iterations(1)
4855 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
4856 }
4857 }
4858 }
4859 }
4860
4861 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, n_gt_4) {
4862 TEST_REQUIRES_X86_SSE41;
4863 for (uint32_t n = 5; n < 8; n++) {
4864 for (size_t k = 1; k <= 40; k += 9) {
4865 GemmMicrokernelTester()
4866 .mr(2)
4867 .nr(4)
4868 .kr(2)
4869 .sr(1)
4870 .m(2)
4871 .n(4)
4872 .k(k)
4873 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
4874 }
4875 }
4876 }
4877
4878 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, n_gt_4_strided_cn) {
4879 TEST_REQUIRES_X86_SSE41;
4880 for (uint32_t n = 5; n < 8; n++) {
4881 for (size_t k = 1; k <= 40; k += 9) {
4882 GemmMicrokernelTester()
4883 .mr(2)
4884 .nr(4)
4885 .kr(2)
4886 .sr(1)
4887 .m(2)
4888 .n(4)
4889 .k(k)
4890 .cn_stride(7)
4891 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
4892 }
4893 }
4894 }
4895
4896 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, n_gt_4_subtile) {
4897 TEST_REQUIRES_X86_SSE41;
4898 for (uint32_t n = 5; n < 8; n++) {
4899 for (size_t k = 1; k <= 40; k += 9) {
4900 for (uint32_t m = 1; m <= 2; m++) {
4901 GemmMicrokernelTester()
4902 .mr(2)
4903 .nr(4)
4904 .kr(2)
4905 .sr(1)
4906 .m(m)
4907 .n(n)
4908 .k(k)
4909 .iterations(1)
4910 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
4911 }
4912 }
4913 }
4914 }
4915
4916 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, n_div_4) {
4917 TEST_REQUIRES_X86_SSE41;
4918 for (uint32_t n = 8; n <= 12; n += 4) {
4919 for (size_t k = 1; k <= 40; k += 9) {
4920 GemmMicrokernelTester()
4921 .mr(2)
4922 .nr(4)
4923 .kr(2)
4924 .sr(1)
4925 .m(2)
4926 .n(4)
4927 .k(k)
4928 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
4929 }
4930 }
4931 }
4932
4933 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, n_div_4_strided_cn) {
4934 TEST_REQUIRES_X86_SSE41;
4935 for (uint32_t n = 8; n <= 12; n += 4) {
4936 for (size_t k = 1; k <= 40; k += 9) {
4937 GemmMicrokernelTester()
4938 .mr(2)
4939 .nr(4)
4940 .kr(2)
4941 .sr(1)
4942 .m(2)
4943 .n(n)
4944 .k(k)
4945 .cn_stride(7)
4946 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
4947 }
4948 }
4949 }
4950
4951 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, n_div_4_subtile) {
4952 TEST_REQUIRES_X86_SSE41;
4953 for (uint32_t n = 8; n <= 12; n += 4) {
4954 for (size_t k = 1; k <= 40; k += 9) {
4955 for (uint32_t m = 1; m <= 2; m++) {
4956 GemmMicrokernelTester()
4957 .mr(2)
4958 .nr(4)
4959 .kr(2)
4960 .sr(1)
4961 .m(m)
4962 .n(n)
4963 .k(k)
4964 .iterations(1)
4965 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
4966 }
4967 }
4968 }
4969 }
4970
4971 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, small_kernel) {
4972 TEST_REQUIRES_X86_SSE41;
4973 for (size_t k = 1; k <= 40; k += 9) {
4974 GemmMicrokernelTester()
4975 .mr(2)
4976 .nr(4)
4977 .kr(2)
4978 .sr(1)
4979 .m(2)
4980 .n(4)
4981 .k(k)
4982 .ks(3)
4983 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
4984 }
4985 }
4986
4987 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, small_kernel_subtile) {
4988 TEST_REQUIRES_X86_SSE41;
4989 for (size_t k = 1; k <= 40; k += 9) {
4990 for (uint32_t m = 1; m <= 2; m++) {
4991 for (uint32_t n = 1; n <= 4; n++) {
4992 GemmMicrokernelTester()
4993 .mr(2)
4994 .nr(4)
4995 .kr(2)
4996 .sr(1)
4997 .m(m)
4998 .n(n)
4999 .k(k)
5000 .ks(3)
5001 .iterations(1)
5002 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
5003 }
5004 }
5005 }
5006 }
5007
5008 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, n_gt_4_small_kernel) {
5009 TEST_REQUIRES_X86_SSE41;
5010 for (uint32_t n = 5; n < 8; n++) {
5011 for (size_t k = 1; k <= 40; k += 9) {
5012 GemmMicrokernelTester()
5013 .mr(2)
5014 .nr(4)
5015 .kr(2)
5016 .sr(1)
5017 .m(2)
5018 .n(4)
5019 .k(k)
5020 .ks(3)
5021 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
5022 }
5023 }
5024 }
5025
5026 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, n_div_4_small_kernel) {
5027 TEST_REQUIRES_X86_SSE41;
5028 for (uint32_t n = 8; n <= 12; n += 4) {
5029 for (size_t k = 1; k <= 40; k += 9) {
5030 GemmMicrokernelTester()
5031 .mr(2)
5032 .nr(4)
5033 .kr(2)
5034 .sr(1)
5035 .m(2)
5036 .n(4)
5037 .k(k)
5038 .ks(3)
5039 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
5040 }
5041 }
5042 }
5043
5044 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, strided_cm_subtile) {
5045 TEST_REQUIRES_X86_SSE41;
5046 for (size_t k = 1; k <= 40; k += 9) {
5047 for (uint32_t m = 1; m <= 2; m++) {
5048 for (uint32_t n = 1; n <= 4; n++) {
5049 GemmMicrokernelTester()
5050 .mr(2)
5051 .nr(4)
5052 .kr(2)
5053 .sr(1)
5054 .m(m)
5055 .n(n)
5056 .k(k)
5057 .cm_stride(7)
5058 .iterations(1)
5059 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
5060 }
5061 }
5062 }
5063 }
5064
5065 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, a_offset) {
5066 TEST_REQUIRES_X86_SSE41;
5067 for (size_t k = 1; k <= 40; k += 9) {
5068 GemmMicrokernelTester()
5069 .mr(2)
5070 .nr(4)
5071 .kr(2)
5072 .sr(1)
5073 .m(2)
5074 .n(4)
5075 .k(k)
5076 .ks(3)
5077 .a_offset(83)
5078 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
5079 }
5080 }
5081
5082 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, zero) {
5083 TEST_REQUIRES_X86_SSE41;
5084 for (uint32_t mz = 0; mz < 2; mz++) {
5085 for (size_t k = 1; k <= 40; k += 9) {
5086 GemmMicrokernelTester()
5087 .mr(2)
5088 .nr(4)
5089 .kr(2)
5090 .sr(1)
5091 .m(2)
5092 .n(4)
5093 .k(k)
5094 .ks(3)
5095 .a_offset(83)
5096 .zero_index(mz)
5097 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
5098 }
5099 }
5100 }
5101
5102 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, qmin) {
5103 TEST_REQUIRES_X86_SSE41;
5104 GemmMicrokernelTester()
5105 .mr(2)
5106 .nr(4)
5107 .kr(2)
5108 .sr(1)
5109 .m(2)
5110 .n(4)
5111 .k(8)
5112 .qmin(128)
5113 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
5114 }
5115
5116 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, qmax) {
5117 TEST_REQUIRES_X86_SSE41;
5118 GemmMicrokernelTester()
5119 .mr(2)
5120 .nr(4)
5121 .kr(2)
5122 .sr(1)
5123 .m(2)
5124 .n(4)
5125 .k(8)
5126 .qmax(128)
5127 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
5128 }
5129
5130 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, strided_cm) {
5131 TEST_REQUIRES_X86_SSE41;
5132 GemmMicrokernelTester()
5133 .mr(2)
5134 .nr(4)
5135 .kr(2)
5136 .sr(1)
5137 .m(2)
5138 .n(4)
5139 .k(8)
5140 .cm_stride(7)
5141 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
5142 }
5143
5144 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, no_a_zero_point) {
5145 TEST_REQUIRES_X86_SSE41;
5146 for (size_t k = 1; k <= 40; k += 9) {
5147 GemmMicrokernelTester()
5148 .mr(2)
5149 .nr(4)
5150 .kr(2)
5151 .sr(1)
5152 .m(2)
5153 .n(4)
5154 .k(k)
5155 .a_zero_point(0)
5156 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
5157 }
5158 }
5159
5160 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, no_b_zero_point) {
5161 TEST_REQUIRES_X86_SSE41;
5162 for (size_t k = 1; k <= 40; k += 9) {
5163 GemmMicrokernelTester()
5164 .mr(2)
5165 .nr(4)
5166 .kr(2)
5167 .sr(1)
5168 .m(2)
5169 .n(4)
5170 .k(k)
5171 .b_zero_point(0)
5172 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
5173 }
5174 }
5175
5176 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, no_zero_point) {
5177 TEST_REQUIRES_X86_SSE41;
5178 for (size_t k = 1; k <= 40; k += 9) {
5179 GemmMicrokernelTester()
5180 .mr(2)
5181 .nr(4)
5182 .kr(2)
5183 .sr(1)
5184 .m(2)
5185 .n(4)
5186 .k(k)
5187 .a_zero_point(0)
5188 .b_zero_point(0)
5189 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
5190 }
5191 }
5192#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5193
5194
5195#if XNN_ARCH_X86 || XNN_ARCH_X86_64
5196 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_eq_8) {
5197 TEST_REQUIRES_X86_SSE41;
5198 GemmMicrokernelTester()
5199 .mr(3)
5200 .nr(4)
5201 .kr(2)
5202 .sr(1)
5203 .m(3)
5204 .n(4)
5205 .k(8)
5206 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
5207 }
5208
5209 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, strided_cn) {
5210 TEST_REQUIRES_X86_SSE41;
5211 GemmMicrokernelTester()
5212 .mr(3)
5213 .nr(4)
5214 .kr(2)
5215 .sr(1)
5216 .m(3)
5217 .n(4)
5218 .k(8)
5219 .cn_stride(7)
5220 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
5221 }
5222
5223 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_eq_8_subtile) {
5224 TEST_REQUIRES_X86_SSE41;
5225 for (uint32_t m = 1; m <= 3; m++) {
5226 for (uint32_t n = 1; n <= 4; n++) {
5227 GemmMicrokernelTester()
5228 .mr(3)
5229 .nr(4)
5230 .kr(2)
5231 .sr(1)
5232 .m(m)
5233 .n(n)
5234 .k(8)
5235 .iterations(1)
5236 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
5237 }
5238 }
5239 }
5240
5241 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_eq_8_subtile_m) {
5242 TEST_REQUIRES_X86_SSE41;
5243 for (uint32_t m = 1; m <= 3; m++) {
5244 GemmMicrokernelTester()
5245 .mr(3)
5246 .nr(4)
5247 .kr(2)
5248 .sr(1)
5249 .m(m)
5250 .n(4)
5251 .k(8)
5252 .iterations(1)
5253 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
5254 }
5255 }
5256
5257 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_eq_8_subtile_n) {
5258 TEST_REQUIRES_X86_SSE41;
5259 for (uint32_t n = 1; n <= 4; n++) {
5260 GemmMicrokernelTester()
5261 .mr(3)
5262 .nr(4)
5263 .kr(2)
5264 .sr(1)
5265 .m(3)
5266 .n(n)
5267 .k(8)
5268 .iterations(1)
5269 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
5270 }
5271 }
5272
5273 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_lt_8) {
5274 TEST_REQUIRES_X86_SSE41;
5275 for (size_t k = 1; k < 8; k++) {
5276 GemmMicrokernelTester()
5277 .mr(3)
5278 .nr(4)
5279 .kr(2)
5280 .sr(1)
5281 .m(3)
5282 .n(4)
5283 .k(k)
5284 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
5285 }
5286 }
5287
5288 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_lt_8_subtile) {
5289 TEST_REQUIRES_X86_SSE41;
5290 for (size_t k = 1; k < 8; k++) {
5291 for (uint32_t m = 1; m <= 3; m++) {
5292 for (uint32_t n = 1; n <= 4; n++) {
5293 GemmMicrokernelTester()
5294 .mr(3)
5295 .nr(4)
5296 .kr(2)
5297 .sr(1)
5298 .m(m)
5299 .n(n)
5300 .k(k)
5301 .iterations(1)
5302 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
5303 }
5304 }
5305 }
5306 }
5307
5308 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_gt_8) {
5309 TEST_REQUIRES_X86_SSE41;
5310 for (size_t k = 9; k < 16; k++) {
5311 GemmMicrokernelTester()
5312 .mr(3)
5313 .nr(4)
5314 .kr(2)
5315 .sr(1)
5316 .m(3)
5317 .n(4)
5318 .k(k)
5319 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
5320 }
5321 }
5322
5323 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_gt_8_subtile) {
5324 TEST_REQUIRES_X86_SSE41;
5325 for (size_t k = 9; k < 16; k++) {
5326 for (uint32_t m = 1; m <= 3; m++) {
5327 for (uint32_t n = 1; n <= 4; n++) {
5328 GemmMicrokernelTester()
5329 .mr(3)
5330 .nr(4)
5331 .kr(2)
5332 .sr(1)
5333 .m(m)
5334 .n(n)
5335 .k(k)
5336 .iterations(1)
5337 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
5338 }
5339 }
5340 }
5341 }
5342
5343 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_div_8) {
5344 TEST_REQUIRES_X86_SSE41;
5345 for (size_t k = 16; k <= 80; k += 8) {
5346 GemmMicrokernelTester()
5347 .mr(3)
5348 .nr(4)
5349 .kr(2)
5350 .sr(1)
5351 .m(3)
5352 .n(4)
5353 .k(k)
5354 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
5355 }
5356 }
5357
5358 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_div_8_subtile) {
5359 TEST_REQUIRES_X86_SSE41;
5360 for (size_t k = 16; k <= 80; k += 8) {
5361 for (uint32_t m = 1; m <= 3; m++) {
5362 for (uint32_t n = 1; n <= 4; n++) {
5363 GemmMicrokernelTester()
5364 .mr(3)
5365 .nr(4)
5366 .kr(2)
5367 .sr(1)
5368 .m(m)
5369 .n(n)
5370 .k(k)
5371 .iterations(1)
5372 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
5373 }
5374 }
5375 }
5376 }
5377
5378 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_gt_4) {
5379 TEST_REQUIRES_X86_SSE41;
5380 for (uint32_t n = 5; n < 8; n++) {
5381 for (size_t k = 1; k <= 40; k += 9) {
5382 GemmMicrokernelTester()
5383 .mr(3)
5384 .nr(4)
5385 .kr(2)
5386 .sr(1)
5387 .m(3)
5388 .n(4)
5389 .k(k)
5390 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
5391 }
5392 }
5393 }
5394
5395 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_gt_4_strided_cn) {
5396 TEST_REQUIRES_X86_SSE41;
5397 for (uint32_t n = 5; n < 8; n++) {
5398 for (size_t k = 1; k <= 40; k += 9) {
5399 GemmMicrokernelTester()
5400 .mr(3)
5401 .nr(4)
5402 .kr(2)
5403 .sr(1)
5404 .m(3)
5405 .n(4)
5406 .k(k)
5407 .cn_stride(7)
5408 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
5409 }
5410 }
5411 }
5412
5413 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_gt_4_subtile) {
5414 TEST_REQUIRES_X86_SSE41;
5415 for (uint32_t n = 5; n < 8; n++) {
5416 for (size_t k = 1; k <= 40; k += 9) {
5417 for (uint32_t m = 1; m <= 3; m++) {
5418 GemmMicrokernelTester()
5419 .mr(3)
5420 .nr(4)
5421 .kr(2)
5422 .sr(1)
5423 .m(m)
5424 .n(n)
5425 .k(k)
5426 .iterations(1)
5427 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
5428 }
5429 }
5430 }
5431 }
5432
5433 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_div_4) {
5434 TEST_REQUIRES_X86_SSE41;
5435 for (uint32_t n = 8; n <= 12; n += 4) {
5436 for (size_t k = 1; k <= 40; k += 9) {
5437 GemmMicrokernelTester()
5438 .mr(3)
5439 .nr(4)
5440 .kr(2)
5441 .sr(1)
5442 .m(3)
5443 .n(4)
5444 .k(k)
5445 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
5446 }
5447 }
5448 }
5449
5450 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_div_4_strided_cn) {
5451 TEST_REQUIRES_X86_SSE41;
5452 for (uint32_t n = 8; n <= 12; n += 4) {
5453 for (size_t k = 1; k <= 40; k += 9) {
5454 GemmMicrokernelTester()
5455 .mr(3)
5456 .nr(4)
5457 .kr(2)
5458 .sr(1)
5459 .m(3)
5460 .n(n)
5461 .k(k)
5462 .cn_stride(7)
5463 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
5464 }
5465 }
5466 }
5467
5468 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_div_4_subtile) {
5469 TEST_REQUIRES_X86_SSE41;
5470 for (uint32_t n = 8; n <= 12; n += 4) {
5471 for (size_t k = 1; k <= 40; k += 9) {
5472 for (uint32_t m = 1; m <= 3; m++) {
5473 GemmMicrokernelTester()
5474 .mr(3)
5475 .nr(4)
5476 .kr(2)
5477 .sr(1)
5478 .m(m)
5479 .n(n)
5480 .k(k)
5481 .iterations(1)
5482 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
5483 }
5484 }
5485 }
5486 }
5487
5488 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, small_kernel) {
5489 TEST_REQUIRES_X86_SSE41;
5490 for (size_t k = 1; k <= 40; k += 9) {
5491 GemmMicrokernelTester()
5492 .mr(3)
5493 .nr(4)
5494 .kr(2)
5495 .sr(1)
5496 .m(3)
5497 .n(4)
5498 .k(k)
5499 .ks(3)
5500 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
5501 }
5502 }
5503
5504 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, small_kernel_subtile) {
5505 TEST_REQUIRES_X86_SSE41;
5506 for (size_t k = 1; k <= 40; k += 9) {
5507 for (uint32_t m = 1; m <= 3; m++) {
5508 for (uint32_t n = 1; n <= 4; n++) {
5509 GemmMicrokernelTester()
5510 .mr(3)
5511 .nr(4)
5512 .kr(2)
5513 .sr(1)
5514 .m(m)
5515 .n(n)
5516 .k(k)
5517 .ks(3)
5518 .iterations(1)
5519 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
5520 }
5521 }
5522 }
5523 }
5524
5525 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_gt_4_small_kernel) {
5526 TEST_REQUIRES_X86_SSE41;
5527 for (uint32_t n = 5; n < 8; n++) {
5528 for (size_t k = 1; k <= 40; k += 9) {
5529 GemmMicrokernelTester()
5530 .mr(3)
5531 .nr(4)
5532 .kr(2)
5533 .sr(1)
5534 .m(3)
5535 .n(4)
5536 .k(k)
5537 .ks(3)
5538 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
5539 }
5540 }
5541 }
5542
5543 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_div_4_small_kernel) {
5544 TEST_REQUIRES_X86_SSE41;
5545 for (uint32_t n = 8; n <= 12; n += 4) {
5546 for (size_t k = 1; k <= 40; k += 9) {
5547 GemmMicrokernelTester()
5548 .mr(3)
5549 .nr(4)
5550 .kr(2)
5551 .sr(1)
5552 .m(3)
5553 .n(4)
5554 .k(k)
5555 .ks(3)
5556 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
5557 }
5558 }
5559 }
5560
5561 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, strided_cm_subtile) {
5562 TEST_REQUIRES_X86_SSE41;
5563 for (size_t k = 1; k <= 40; k += 9) {
5564 for (uint32_t m = 1; m <= 3; m++) {
5565 for (uint32_t n = 1; n <= 4; n++) {
5566 GemmMicrokernelTester()
5567 .mr(3)
5568 .nr(4)
5569 .kr(2)
5570 .sr(1)
5571 .m(m)
5572 .n(n)
5573 .k(k)
5574 .cm_stride(7)
5575 .iterations(1)
5576 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
5577 }
5578 }
5579 }
5580 }
5581
5582 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, a_offset) {
5583 TEST_REQUIRES_X86_SSE41;
5584 for (size_t k = 1; k <= 40; k += 9) {
5585 GemmMicrokernelTester()
5586 .mr(3)
5587 .nr(4)
5588 .kr(2)
5589 .sr(1)
5590 .m(3)
5591 .n(4)
5592 .k(k)
5593 .ks(3)
5594 .a_offset(127)
5595 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
5596 }
5597 }
5598
5599 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, zero) {
5600 TEST_REQUIRES_X86_SSE41;
5601 for (uint32_t mz = 0; mz < 3; mz++) {
5602 for (size_t k = 1; k <= 40; k += 9) {
5603 GemmMicrokernelTester()
5604 .mr(3)
5605 .nr(4)
5606 .kr(2)
5607 .sr(1)
5608 .m(3)
5609 .n(4)
5610 .k(k)
5611 .ks(3)
5612 .a_offset(127)
5613 .zero_index(mz)
5614 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
5615 }
5616 }
5617 }
5618
5619 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, qmin) {
5620 TEST_REQUIRES_X86_SSE41;
5621 GemmMicrokernelTester()
5622 .mr(3)
5623 .nr(4)
5624 .kr(2)
5625 .sr(1)
5626 .m(3)
5627 .n(4)
5628 .k(8)
5629 .qmin(128)
5630 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
5631 }
5632
5633 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, qmax) {
5634 TEST_REQUIRES_X86_SSE41;
5635 GemmMicrokernelTester()
5636 .mr(3)
5637 .nr(4)
5638 .kr(2)
5639 .sr(1)
5640 .m(3)
5641 .n(4)
5642 .k(8)
5643 .qmax(128)
5644 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
5645 }
5646
5647 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, strided_cm) {
5648 TEST_REQUIRES_X86_SSE41;
5649 GemmMicrokernelTester()
5650 .mr(3)
5651 .nr(4)
5652 .kr(2)
5653 .sr(1)
5654 .m(3)
5655 .n(4)
5656 .k(8)
5657 .cm_stride(7)
5658 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
5659 }
5660
5661 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, no_a_zero_point) {
5662 TEST_REQUIRES_X86_SSE41;
5663 for (size_t k = 1; k <= 40; k += 9) {
5664 GemmMicrokernelTester()
5665 .mr(3)
5666 .nr(4)
5667 .kr(2)
5668 .sr(1)
5669 .m(3)
5670 .n(4)
5671 .k(k)
5672 .a_zero_point(0)
5673 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
5674 }
5675 }
5676
5677 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, no_b_zero_point) {
5678 TEST_REQUIRES_X86_SSE41;
5679 for (size_t k = 1; k <= 40; k += 9) {
5680 GemmMicrokernelTester()
5681 .mr(3)
5682 .nr(4)
5683 .kr(2)
5684 .sr(1)
5685 .m(3)
5686 .n(4)
5687 .k(k)
5688 .b_zero_point(0)
5689 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
5690 }
5691 }
5692
5693 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, no_zero_point) {
5694 TEST_REQUIRES_X86_SSE41;
5695 for (size_t k = 1; k <= 40; k += 9) {
5696 GemmMicrokernelTester()
5697 .mr(3)
5698 .nr(4)
5699 .kr(2)
5700 .sr(1)
5701 .m(3)
5702 .n(4)
5703 .k(k)
5704 .a_zero_point(0)
5705 .b_zero_point(0)
5706 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
5707 }
5708 }
5709#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5710
5711
5712#if XNN_ARCH_X86 || XNN_ARCH_X86_64
5713 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_eq_8) {
5714 TEST_REQUIRES_X86_SSE41;
5715 GemmMicrokernelTester()
5716 .mr(4)
5717 .nr(4)
5718 .kr(2)
5719 .sr(1)
5720 .m(4)
5721 .n(4)
5722 .k(8)
5723 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
5724 }
5725
5726 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, strided_cn) {
5727 TEST_REQUIRES_X86_SSE41;
5728 GemmMicrokernelTester()
5729 .mr(4)
5730 .nr(4)
5731 .kr(2)
5732 .sr(1)
5733 .m(4)
5734 .n(4)
5735 .k(8)
5736 .cn_stride(7)
5737 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
5738 }
5739
5740 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_eq_8_subtile) {
5741 TEST_REQUIRES_X86_SSE41;
5742 for (uint32_t m = 1; m <= 4; m++) {
5743 for (uint32_t n = 1; n <= 4; n++) {
5744 GemmMicrokernelTester()
5745 .mr(4)
5746 .nr(4)
5747 .kr(2)
5748 .sr(1)
5749 .m(m)
5750 .n(n)
5751 .k(8)
5752 .iterations(1)
5753 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
5754 }
5755 }
5756 }
5757
5758 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_eq_8_subtile_m) {
5759 TEST_REQUIRES_X86_SSE41;
5760 for (uint32_t m = 1; m <= 4; m++) {
5761 GemmMicrokernelTester()
5762 .mr(4)
5763 .nr(4)
5764 .kr(2)
5765 .sr(1)
5766 .m(m)
5767 .n(4)
5768 .k(8)
5769 .iterations(1)
5770 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
5771 }
5772 }
5773
5774 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_eq_8_subtile_n) {
5775 TEST_REQUIRES_X86_SSE41;
5776 for (uint32_t n = 1; n <= 4; n++) {
5777 GemmMicrokernelTester()
5778 .mr(4)
5779 .nr(4)
5780 .kr(2)
5781 .sr(1)
5782 .m(4)
5783 .n(n)
5784 .k(8)
5785 .iterations(1)
5786 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
5787 }
5788 }
5789
5790 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_lt_8) {
5791 TEST_REQUIRES_X86_SSE41;
5792 for (size_t k = 1; k < 8; k++) {
5793 GemmMicrokernelTester()
5794 .mr(4)
5795 .nr(4)
5796 .kr(2)
5797 .sr(1)
5798 .m(4)
5799 .n(4)
5800 .k(k)
5801 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
5802 }
5803 }
5804
5805 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_lt_8_subtile) {
5806 TEST_REQUIRES_X86_SSE41;
5807 for (size_t k = 1; k < 8; k++) {
5808 for (uint32_t m = 1; m <= 4; m++) {
5809 for (uint32_t n = 1; n <= 4; n++) {
5810 GemmMicrokernelTester()
5811 .mr(4)
5812 .nr(4)
5813 .kr(2)
5814 .sr(1)
5815 .m(m)
5816 .n(n)
5817 .k(k)
5818 .iterations(1)
5819 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
5820 }
5821 }
5822 }
5823 }
5824
5825 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_gt_8) {
5826 TEST_REQUIRES_X86_SSE41;
5827 for (size_t k = 9; k < 16; k++) {
5828 GemmMicrokernelTester()
5829 .mr(4)
5830 .nr(4)
5831 .kr(2)
5832 .sr(1)
5833 .m(4)
5834 .n(4)
5835 .k(k)
5836 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
5837 }
5838 }
5839
5840 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_gt_8_subtile) {
5841 TEST_REQUIRES_X86_SSE41;
5842 for (size_t k = 9; k < 16; k++) {
5843 for (uint32_t m = 1; m <= 4; m++) {
5844 for (uint32_t n = 1; n <= 4; n++) {
5845 GemmMicrokernelTester()
5846 .mr(4)
5847 .nr(4)
5848 .kr(2)
5849 .sr(1)
5850 .m(m)
5851 .n(n)
5852 .k(k)
5853 .iterations(1)
5854 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
5855 }
5856 }
5857 }
5858 }
5859
5860 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_div_8) {
5861 TEST_REQUIRES_X86_SSE41;
5862 for (size_t k = 16; k <= 80; k += 8) {
5863 GemmMicrokernelTester()
5864 .mr(4)
5865 .nr(4)
5866 .kr(2)
5867 .sr(1)
5868 .m(4)
5869 .n(4)
5870 .k(k)
5871 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
5872 }
5873 }
5874
5875 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_div_8_subtile) {
5876 TEST_REQUIRES_X86_SSE41;
5877 for (size_t k = 16; k <= 80; k += 8) {
5878 for (uint32_t m = 1; m <= 4; m++) {
5879 for (uint32_t n = 1; n <= 4; n++) {
5880 GemmMicrokernelTester()
5881 .mr(4)
5882 .nr(4)
5883 .kr(2)
5884 .sr(1)
5885 .m(m)
5886 .n(n)
5887 .k(k)
5888 .iterations(1)
5889 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
5890 }
5891 }
5892 }
5893 }
5894
5895 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, n_gt_4) {
5896 TEST_REQUIRES_X86_SSE41;
5897 for (uint32_t n = 5; n < 8; n++) {
5898 for (size_t k = 1; k <= 40; k += 9) {
5899 GemmMicrokernelTester()
5900 .mr(4)
5901 .nr(4)
5902 .kr(2)
5903 .sr(1)
5904 .m(4)
5905 .n(4)
5906 .k(k)
5907 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
5908 }
5909 }
5910 }
5911
5912 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, n_gt_4_strided_cn) {
5913 TEST_REQUIRES_X86_SSE41;
5914 for (uint32_t n = 5; n < 8; n++) {
5915 for (size_t k = 1; k <= 40; k += 9) {
5916 GemmMicrokernelTester()
5917 .mr(4)
5918 .nr(4)
5919 .kr(2)
5920 .sr(1)
5921 .m(4)
5922 .n(4)
5923 .k(k)
5924 .cn_stride(7)
5925 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
5926 }
5927 }
5928 }
5929
5930 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, n_gt_4_subtile) {
5931 TEST_REQUIRES_X86_SSE41;
5932 for (uint32_t n = 5; n < 8; n++) {
5933 for (size_t k = 1; k <= 40; k += 9) {
5934 for (uint32_t m = 1; m <= 4; m++) {
5935 GemmMicrokernelTester()
5936 .mr(4)
5937 .nr(4)
5938 .kr(2)
5939 .sr(1)
5940 .m(m)
5941 .n(n)
5942 .k(k)
5943 .iterations(1)
5944 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
5945 }
5946 }
5947 }
5948 }
5949
5950 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, n_div_4) {
5951 TEST_REQUIRES_X86_SSE41;
5952 for (uint32_t n = 8; n <= 12; n += 4) {
5953 for (size_t k = 1; k <= 40; k += 9) {
5954 GemmMicrokernelTester()
5955 .mr(4)
5956 .nr(4)
5957 .kr(2)
5958 .sr(1)
5959 .m(4)
5960 .n(4)
5961 .k(k)
5962 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
5963 }
5964 }
5965 }
5966
5967 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, n_div_4_strided_cn) {
5968 TEST_REQUIRES_X86_SSE41;
5969 for (uint32_t n = 8; n <= 12; n += 4) {
5970 for (size_t k = 1; k <= 40; k += 9) {
5971 GemmMicrokernelTester()
5972 .mr(4)
5973 .nr(4)
5974 .kr(2)
5975 .sr(1)
5976 .m(4)
5977 .n(n)
5978 .k(k)
5979 .cn_stride(7)
5980 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
5981 }
5982 }
5983 }
5984
5985 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, n_div_4_subtile) {
5986 TEST_REQUIRES_X86_SSE41;
5987 for (uint32_t n = 8; n <= 12; n += 4) {
5988 for (size_t k = 1; k <= 40; k += 9) {
5989 for (uint32_t m = 1; m <= 4; m++) {
5990 GemmMicrokernelTester()
5991 .mr(4)
5992 .nr(4)
5993 .kr(2)
5994 .sr(1)
5995 .m(m)
5996 .n(n)
5997 .k(k)
5998 .iterations(1)
5999 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
6000 }
6001 }
6002 }
6003 }
6004
6005 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, small_kernel) {
6006 TEST_REQUIRES_X86_SSE41;
6007 for (size_t k = 1; k <= 40; k += 9) {
6008 GemmMicrokernelTester()
6009 .mr(4)
6010 .nr(4)
6011 .kr(2)
6012 .sr(1)
6013 .m(4)
6014 .n(4)
6015 .k(k)
6016 .ks(3)
6017 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
6018 }
6019 }
6020
6021 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, small_kernel_subtile) {
6022 TEST_REQUIRES_X86_SSE41;
6023 for (size_t k = 1; k <= 40; k += 9) {
6024 for (uint32_t m = 1; m <= 4; m++) {
6025 for (uint32_t n = 1; n <= 4; n++) {
6026 GemmMicrokernelTester()
6027 .mr(4)
6028 .nr(4)
6029 .kr(2)
6030 .sr(1)
6031 .m(m)
6032 .n(n)
6033 .k(k)
6034 .ks(3)
6035 .iterations(1)
6036 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
6037 }
6038 }
6039 }
6040 }
6041
6042 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, n_gt_4_small_kernel) {
6043 TEST_REQUIRES_X86_SSE41;
6044 for (uint32_t n = 5; n < 8; n++) {
6045 for (size_t k = 1; k <= 40; k += 9) {
6046 GemmMicrokernelTester()
6047 .mr(4)
6048 .nr(4)
6049 .kr(2)
6050 .sr(1)
6051 .m(4)
6052 .n(4)
6053 .k(k)
6054 .ks(3)
6055 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
6056 }
6057 }
6058 }
6059
6060 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, n_div_4_small_kernel) {
6061 TEST_REQUIRES_X86_SSE41;
6062 for (uint32_t n = 8; n <= 12; n += 4) {
6063 for (size_t k = 1; k <= 40; k += 9) {
6064 GemmMicrokernelTester()
6065 .mr(4)
6066 .nr(4)
6067 .kr(2)
6068 .sr(1)
6069 .m(4)
6070 .n(4)
6071 .k(k)
6072 .ks(3)
6073 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
6074 }
6075 }
6076 }
6077
6078 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, strided_cm_subtile) {
6079 TEST_REQUIRES_X86_SSE41;
6080 for (size_t k = 1; k <= 40; k += 9) {
6081 for (uint32_t m = 1; m <= 4; m++) {
6082 for (uint32_t n = 1; n <= 4; n++) {
6083 GemmMicrokernelTester()
6084 .mr(4)
6085 .nr(4)
6086 .kr(2)
6087 .sr(1)
6088 .m(m)
6089 .n(n)
6090 .k(k)
6091 .cm_stride(7)
6092 .iterations(1)
6093 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
6094 }
6095 }
6096 }
6097 }
6098
6099 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, a_offset) {
6100 TEST_REQUIRES_X86_SSE41;
6101 for (size_t k = 1; k <= 40; k += 9) {
6102 GemmMicrokernelTester()
6103 .mr(4)
6104 .nr(4)
6105 .kr(2)
6106 .sr(1)
6107 .m(4)
6108 .n(4)
6109 .k(k)
6110 .ks(3)
6111 .a_offset(163)
6112 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
6113 }
6114 }
6115
6116 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, zero) {
6117 TEST_REQUIRES_X86_SSE41;
6118 for (uint32_t mz = 0; mz < 4; mz++) {
6119 for (size_t k = 1; k <= 40; k += 9) {
6120 GemmMicrokernelTester()
6121 .mr(4)
6122 .nr(4)
6123 .kr(2)
6124 .sr(1)
6125 .m(4)
6126 .n(4)
6127 .k(k)
6128 .ks(3)
6129 .a_offset(163)
6130 .zero_index(mz)
6131 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
6132 }
6133 }
6134 }
6135
6136 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, qmin) {
6137 TEST_REQUIRES_X86_SSE41;
6138 GemmMicrokernelTester()
6139 .mr(4)
6140 .nr(4)
6141 .kr(2)
6142 .sr(1)
6143 .m(4)
6144 .n(4)
6145 .k(8)
6146 .qmin(128)
6147 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
6148 }
6149
6150 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, qmax) {
6151 TEST_REQUIRES_X86_SSE41;
6152 GemmMicrokernelTester()
6153 .mr(4)
6154 .nr(4)
6155 .kr(2)
6156 .sr(1)
6157 .m(4)
6158 .n(4)
6159 .k(8)
6160 .qmax(128)
6161 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
6162 }
6163
6164 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, strided_cm) {
6165 TEST_REQUIRES_X86_SSE41;
6166 GemmMicrokernelTester()
6167 .mr(4)
6168 .nr(4)
6169 .kr(2)
6170 .sr(1)
6171 .m(4)
6172 .n(4)
6173 .k(8)
6174 .cm_stride(7)
6175 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
6176 }
6177
6178 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, no_a_zero_point) {
6179 TEST_REQUIRES_X86_SSE41;
6180 for (size_t k = 1; k <= 40; k += 9) {
6181 GemmMicrokernelTester()
6182 .mr(4)
6183 .nr(4)
6184 .kr(2)
6185 .sr(1)
6186 .m(4)
6187 .n(4)
6188 .k(k)
6189 .a_zero_point(0)
6190 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
6191 }
6192 }
6193
6194 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, no_b_zero_point) {
6195 TEST_REQUIRES_X86_SSE41;
6196 for (size_t k = 1; k <= 40; k += 9) {
6197 GemmMicrokernelTester()
6198 .mr(4)
6199 .nr(4)
6200 .kr(2)
6201 .sr(1)
6202 .m(4)
6203 .n(4)
6204 .k(k)
6205 .b_zero_point(0)
6206 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
6207 }
6208 }
6209
6210 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, no_zero_point) {
6211 TEST_REQUIRES_X86_SSE41;
6212 for (size_t k = 1; k <= 40; k += 9) {
6213 GemmMicrokernelTester()
6214 .mr(4)
6215 .nr(4)
6216 .kr(2)
6217 .sr(1)
6218 .m(4)
6219 .n(4)
6220 .k(k)
6221 .a_zero_point(0)
6222 .b_zero_point(0)
6223 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
6224 }
6225 }
6226#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6227
6228
6229#if XNN_ARCH_X86 || XNN_ARCH_X86_64
6230 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, k_eq_8) {
6231 TEST_REQUIRES_X86_AVX;
6232 GemmMicrokernelTester()
6233 .mr(1)
6234 .nr(4)
6235 .kr(2)
6236 .sr(1)
6237 .m(1)
6238 .n(4)
6239 .k(8)
6240 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
6241 }
6242
6243 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, strided_cn) {
6244 TEST_REQUIRES_X86_AVX;
6245 GemmMicrokernelTester()
6246 .mr(1)
6247 .nr(4)
6248 .kr(2)
6249 .sr(1)
6250 .m(1)
6251 .n(4)
6252 .k(8)
6253 .cn_stride(7)
6254 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
6255 }
6256
6257 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, k_eq_8_subtile) {
6258 TEST_REQUIRES_X86_AVX;
6259 for (uint32_t m = 1; m <= 1; m++) {
6260 for (uint32_t n = 1; n <= 4; n++) {
6261 GemmMicrokernelTester()
6262 .mr(1)
6263 .nr(4)
6264 .kr(2)
6265 .sr(1)
6266 .m(m)
6267 .n(n)
6268 .k(8)
6269 .iterations(1)
6270 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
6271 }
6272 }
6273 }
6274
6275 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, k_eq_8_subtile_m) {
6276 TEST_REQUIRES_X86_AVX;
6277 for (uint32_t m = 1; m <= 1; m++) {
6278 GemmMicrokernelTester()
6279 .mr(1)
6280 .nr(4)
6281 .kr(2)
6282 .sr(1)
6283 .m(m)
6284 .n(4)
6285 .k(8)
6286 .iterations(1)
6287 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
6288 }
6289 }
6290
6291 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, k_eq_8_subtile_n) {
6292 TEST_REQUIRES_X86_AVX;
6293 for (uint32_t n = 1; n <= 4; n++) {
6294 GemmMicrokernelTester()
6295 .mr(1)
6296 .nr(4)
6297 .kr(2)
6298 .sr(1)
6299 .m(1)
6300 .n(n)
6301 .k(8)
6302 .iterations(1)
6303 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
6304 }
6305 }
6306
6307 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, k_lt_8) {
6308 TEST_REQUIRES_X86_AVX;
6309 for (size_t k = 1; k < 8; k++) {
6310 GemmMicrokernelTester()
6311 .mr(1)
6312 .nr(4)
6313 .kr(2)
6314 .sr(1)
6315 .m(1)
6316 .n(4)
6317 .k(k)
6318 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
6319 }
6320 }
6321
6322 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, k_lt_8_subtile) {
6323 TEST_REQUIRES_X86_AVX;
6324 for (size_t k = 1; k < 8; k++) {
6325 for (uint32_t m = 1; m <= 1; m++) {
6326 for (uint32_t n = 1; n <= 4; n++) {
6327 GemmMicrokernelTester()
6328 .mr(1)
6329 .nr(4)
6330 .kr(2)
6331 .sr(1)
6332 .m(m)
6333 .n(n)
6334 .k(k)
6335 .iterations(1)
6336 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
6337 }
6338 }
6339 }
6340 }
6341
6342 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, k_gt_8) {
6343 TEST_REQUIRES_X86_AVX;
6344 for (size_t k = 9; k < 16; k++) {
6345 GemmMicrokernelTester()
6346 .mr(1)
6347 .nr(4)
6348 .kr(2)
6349 .sr(1)
6350 .m(1)
6351 .n(4)
6352 .k(k)
6353 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
6354 }
6355 }
6356
6357 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, k_gt_8_subtile) {
6358 TEST_REQUIRES_X86_AVX;
6359 for (size_t k = 9; k < 16; k++) {
6360 for (uint32_t m = 1; m <= 1; m++) {
6361 for (uint32_t n = 1; n <= 4; n++) {
6362 GemmMicrokernelTester()
6363 .mr(1)
6364 .nr(4)
6365 .kr(2)
6366 .sr(1)
6367 .m(m)
6368 .n(n)
6369 .k(k)
6370 .iterations(1)
6371 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
6372 }
6373 }
6374 }
6375 }
6376
6377 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, k_div_8) {
6378 TEST_REQUIRES_X86_AVX;
6379 for (size_t k = 16; k <= 80; k += 8) {
6380 GemmMicrokernelTester()
6381 .mr(1)
6382 .nr(4)
6383 .kr(2)
6384 .sr(1)
6385 .m(1)
6386 .n(4)
6387 .k(k)
6388 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
6389 }
6390 }
6391
6392 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, k_div_8_subtile) {
6393 TEST_REQUIRES_X86_AVX;
6394 for (size_t k = 16; k <= 80; k += 8) {
6395 for (uint32_t m = 1; m <= 1; m++) {
6396 for (uint32_t n = 1; n <= 4; n++) {
6397 GemmMicrokernelTester()
6398 .mr(1)
6399 .nr(4)
6400 .kr(2)
6401 .sr(1)
6402 .m(m)
6403 .n(n)
6404 .k(k)
6405 .iterations(1)
6406 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
6407 }
6408 }
6409 }
6410 }
6411
6412 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, n_gt_4) {
6413 TEST_REQUIRES_X86_AVX;
6414 for (uint32_t n = 5; n < 8; n++) {
6415 for (size_t k = 1; k <= 40; k += 9) {
6416 GemmMicrokernelTester()
6417 .mr(1)
6418 .nr(4)
6419 .kr(2)
6420 .sr(1)
6421 .m(1)
6422 .n(4)
6423 .k(k)
6424 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
6425 }
6426 }
6427 }
6428
6429 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, n_gt_4_strided_cn) {
6430 TEST_REQUIRES_X86_AVX;
6431 for (uint32_t n = 5; n < 8; n++) {
6432 for (size_t k = 1; k <= 40; k += 9) {
6433 GemmMicrokernelTester()
6434 .mr(1)
6435 .nr(4)
6436 .kr(2)
6437 .sr(1)
6438 .m(1)
6439 .n(4)
6440 .k(k)
6441 .cn_stride(7)
6442 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
6443 }
6444 }
6445 }
6446
6447 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, n_gt_4_subtile) {
6448 TEST_REQUIRES_X86_AVX;
6449 for (uint32_t n = 5; n < 8; n++) {
6450 for (size_t k = 1; k <= 40; k += 9) {
6451 for (uint32_t m = 1; m <= 1; m++) {
6452 GemmMicrokernelTester()
6453 .mr(1)
6454 .nr(4)
6455 .kr(2)
6456 .sr(1)
6457 .m(m)
6458 .n(n)
6459 .k(k)
6460 .iterations(1)
6461 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
6462 }
6463 }
6464 }
6465 }
6466
6467 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, n_div_4) {
6468 TEST_REQUIRES_X86_AVX;
6469 for (uint32_t n = 8; n <= 12; n += 4) {
6470 for (size_t k = 1; k <= 40; k += 9) {
6471 GemmMicrokernelTester()
6472 .mr(1)
6473 .nr(4)
6474 .kr(2)
6475 .sr(1)
6476 .m(1)
6477 .n(4)
6478 .k(k)
6479 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
6480 }
6481 }
6482 }
6483
6484 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, n_div_4_strided_cn) {
6485 TEST_REQUIRES_X86_AVX;
6486 for (uint32_t n = 8; n <= 12; n += 4) {
6487 for (size_t k = 1; k <= 40; k += 9) {
6488 GemmMicrokernelTester()
6489 .mr(1)
6490 .nr(4)
6491 .kr(2)
6492 .sr(1)
6493 .m(1)
6494 .n(n)
6495 .k(k)
6496 .cn_stride(7)
6497 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
6498 }
6499 }
6500 }
6501
6502 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, n_div_4_subtile) {
6503 TEST_REQUIRES_X86_AVX;
6504 for (uint32_t n = 8; n <= 12; n += 4) {
6505 for (size_t k = 1; k <= 40; k += 9) {
6506 for (uint32_t m = 1; m <= 1; m++) {
6507 GemmMicrokernelTester()
6508 .mr(1)
6509 .nr(4)
6510 .kr(2)
6511 .sr(1)
6512 .m(m)
6513 .n(n)
6514 .k(k)
6515 .iterations(1)
6516 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
6517 }
6518 }
6519 }
6520 }
6521
6522 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, small_kernel) {
6523 TEST_REQUIRES_X86_AVX;
6524 for (size_t k = 1; k <= 40; k += 9) {
6525 GemmMicrokernelTester()
6526 .mr(1)
6527 .nr(4)
6528 .kr(2)
6529 .sr(1)
6530 .m(1)
6531 .n(4)
6532 .k(k)
6533 .ks(3)
6534 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
6535 }
6536 }
6537
6538 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, small_kernel_subtile) {
6539 TEST_REQUIRES_X86_AVX;
6540 for (size_t k = 1; k <= 40; k += 9) {
6541 for (uint32_t m = 1; m <= 1; m++) {
6542 for (uint32_t n = 1; n <= 4; n++) {
6543 GemmMicrokernelTester()
6544 .mr(1)
6545 .nr(4)
6546 .kr(2)
6547 .sr(1)
6548 .m(m)
6549 .n(n)
6550 .k(k)
6551 .ks(3)
6552 .iterations(1)
6553 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
6554 }
6555 }
6556 }
6557 }
6558
6559 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, n_gt_4_small_kernel) {
6560 TEST_REQUIRES_X86_AVX;
6561 for (uint32_t n = 5; n < 8; n++) {
6562 for (size_t k = 1; k <= 40; k += 9) {
6563 GemmMicrokernelTester()
6564 .mr(1)
6565 .nr(4)
6566 .kr(2)
6567 .sr(1)
6568 .m(1)
6569 .n(4)
6570 .k(k)
6571 .ks(3)
6572 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
6573 }
6574 }
6575 }
6576
6577 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, n_div_4_small_kernel) {
6578 TEST_REQUIRES_X86_AVX;
6579 for (uint32_t n = 8; n <= 12; n += 4) {
6580 for (size_t k = 1; k <= 40; k += 9) {
6581 GemmMicrokernelTester()
6582 .mr(1)
6583 .nr(4)
6584 .kr(2)
6585 .sr(1)
6586 .m(1)
6587 .n(4)
6588 .k(k)
6589 .ks(3)
6590 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
6591 }
6592 }
6593 }
6594
6595 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, strided_cm_subtile) {
6596 TEST_REQUIRES_X86_AVX;
6597 for (size_t k = 1; k <= 40; k += 9) {
6598 for (uint32_t m = 1; m <= 1; m++) {
6599 for (uint32_t n = 1; n <= 4; n++) {
6600 GemmMicrokernelTester()
6601 .mr(1)
6602 .nr(4)
6603 .kr(2)
6604 .sr(1)
6605 .m(m)
6606 .n(n)
6607 .k(k)
6608 .cm_stride(7)
6609 .iterations(1)
6610 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
6611 }
6612 }
6613 }
6614 }
6615
6616 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, a_offset) {
6617 TEST_REQUIRES_X86_AVX;
6618 for (size_t k = 1; k <= 40; k += 9) {
6619 GemmMicrokernelTester()
6620 .mr(1)
6621 .nr(4)
6622 .kr(2)
6623 .sr(1)
6624 .m(1)
6625 .n(4)
6626 .k(k)
6627 .ks(3)
6628 .a_offset(43)
6629 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
6630 }
6631 }
6632
6633 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, zero) {
6634 TEST_REQUIRES_X86_AVX;
6635 for (uint32_t mz = 0; mz < 1; mz++) {
6636 for (size_t k = 1; k <= 40; k += 9) {
6637 GemmMicrokernelTester()
6638 .mr(1)
6639 .nr(4)
6640 .kr(2)
6641 .sr(1)
6642 .m(1)
6643 .n(4)
6644 .k(k)
6645 .ks(3)
6646 .a_offset(43)
6647 .zero_index(mz)
6648 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
6649 }
6650 }
6651 }
6652
6653 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, qmin) {
6654 TEST_REQUIRES_X86_AVX;
6655 GemmMicrokernelTester()
6656 .mr(1)
6657 .nr(4)
6658 .kr(2)
6659 .sr(1)
6660 .m(1)
6661 .n(4)
6662 .k(8)
6663 .qmin(128)
6664 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
6665 }
6666
6667 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, qmax) {
6668 TEST_REQUIRES_X86_AVX;
6669 GemmMicrokernelTester()
6670 .mr(1)
6671 .nr(4)
6672 .kr(2)
6673 .sr(1)
6674 .m(1)
6675 .n(4)
6676 .k(8)
6677 .qmax(128)
6678 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
6679 }
6680
6681 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, strided_cm) {
6682 TEST_REQUIRES_X86_AVX;
6683 GemmMicrokernelTester()
6684 .mr(1)
6685 .nr(4)
6686 .kr(2)
6687 .sr(1)
6688 .m(1)
6689 .n(4)
6690 .k(8)
6691 .cm_stride(7)
6692 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
6693 }
6694
6695 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, no_a_zero_point) {
6696 TEST_REQUIRES_X86_AVX;
6697 for (size_t k = 1; k <= 40; k += 9) {
6698 GemmMicrokernelTester()
6699 .mr(1)
6700 .nr(4)
6701 .kr(2)
6702 .sr(1)
6703 .m(1)
6704 .n(4)
6705 .k(k)
6706 .a_zero_point(0)
6707 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
6708 }
6709 }
6710
6711 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, no_b_zero_point) {
6712 TEST_REQUIRES_X86_AVX;
6713 for (size_t k = 1; k <= 40; k += 9) {
6714 GemmMicrokernelTester()
6715 .mr(1)
6716 .nr(4)
6717 .kr(2)
6718 .sr(1)
6719 .m(1)
6720 .n(4)
6721 .k(k)
6722 .b_zero_point(0)
6723 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
6724 }
6725 }
6726
6727 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, no_zero_point) {
6728 TEST_REQUIRES_X86_AVX;
6729 for (size_t k = 1; k <= 40; k += 9) {
6730 GemmMicrokernelTester()
6731 .mr(1)
6732 .nr(4)
6733 .kr(2)
6734 .sr(1)
6735 .m(1)
6736 .n(4)
6737 .k(k)
6738 .a_zero_point(0)
6739 .b_zero_point(0)
6740 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
6741 }
6742 }
6743#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6744
6745
6746#if XNN_ARCH_X86 || XNN_ARCH_X86_64
6747 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_eq_8) {
6748 TEST_REQUIRES_X86_AVX;
6749 GemmMicrokernelTester()
6750 .mr(2)
6751 .nr(4)
6752 .kr(2)
6753 .sr(1)
6754 .m(2)
6755 .n(4)
6756 .k(8)
6757 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
6758 }
6759
6760 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, strided_cn) {
6761 TEST_REQUIRES_X86_AVX;
6762 GemmMicrokernelTester()
6763 .mr(2)
6764 .nr(4)
6765 .kr(2)
6766 .sr(1)
6767 .m(2)
6768 .n(4)
6769 .k(8)
6770 .cn_stride(7)
6771 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
6772 }
6773
6774 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_eq_8_subtile) {
6775 TEST_REQUIRES_X86_AVX;
6776 for (uint32_t m = 1; m <= 2; m++) {
6777 for (uint32_t n = 1; n <= 4; n++) {
6778 GemmMicrokernelTester()
6779 .mr(2)
6780 .nr(4)
6781 .kr(2)
6782 .sr(1)
6783 .m(m)
6784 .n(n)
6785 .k(8)
6786 .iterations(1)
6787 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
6788 }
6789 }
6790 }
6791
6792 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_eq_8_subtile_m) {
6793 TEST_REQUIRES_X86_AVX;
6794 for (uint32_t m = 1; m <= 2; m++) {
6795 GemmMicrokernelTester()
6796 .mr(2)
6797 .nr(4)
6798 .kr(2)
6799 .sr(1)
6800 .m(m)
6801 .n(4)
6802 .k(8)
6803 .iterations(1)
6804 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
6805 }
6806 }
6807
6808 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_eq_8_subtile_n) {
6809 TEST_REQUIRES_X86_AVX;
6810 for (uint32_t n = 1; n <= 4; n++) {
6811 GemmMicrokernelTester()
6812 .mr(2)
6813 .nr(4)
6814 .kr(2)
6815 .sr(1)
6816 .m(2)
6817 .n(n)
6818 .k(8)
6819 .iterations(1)
6820 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
6821 }
6822 }
6823
6824 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_lt_8) {
6825 TEST_REQUIRES_X86_AVX;
6826 for (size_t k = 1; k < 8; k++) {
6827 GemmMicrokernelTester()
6828 .mr(2)
6829 .nr(4)
6830 .kr(2)
6831 .sr(1)
6832 .m(2)
6833 .n(4)
6834 .k(k)
6835 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
6836 }
6837 }
6838
6839 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_lt_8_subtile) {
6840 TEST_REQUIRES_X86_AVX;
6841 for (size_t k = 1; k < 8; k++) {
6842 for (uint32_t m = 1; m <= 2; m++) {
6843 for (uint32_t n = 1; n <= 4; n++) {
6844 GemmMicrokernelTester()
6845 .mr(2)
6846 .nr(4)
6847 .kr(2)
6848 .sr(1)
6849 .m(m)
6850 .n(n)
6851 .k(k)
6852 .iterations(1)
6853 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
6854 }
6855 }
6856 }
6857 }
6858
6859 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_gt_8) {
6860 TEST_REQUIRES_X86_AVX;
6861 for (size_t k = 9; k < 16; k++) {
6862 GemmMicrokernelTester()
6863 .mr(2)
6864 .nr(4)
6865 .kr(2)
6866 .sr(1)
6867 .m(2)
6868 .n(4)
6869 .k(k)
6870 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
6871 }
6872 }
6873
6874 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_gt_8_subtile) {
6875 TEST_REQUIRES_X86_AVX;
6876 for (size_t k = 9; k < 16; k++) {
6877 for (uint32_t m = 1; m <= 2; m++) {
6878 for (uint32_t n = 1; n <= 4; n++) {
6879 GemmMicrokernelTester()
6880 .mr(2)
6881 .nr(4)
6882 .kr(2)
6883 .sr(1)
6884 .m(m)
6885 .n(n)
6886 .k(k)
6887 .iterations(1)
6888 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
6889 }
6890 }
6891 }
6892 }
6893
6894 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_div_8) {
6895 TEST_REQUIRES_X86_AVX;
6896 for (size_t k = 16; k <= 80; k += 8) {
6897 GemmMicrokernelTester()
6898 .mr(2)
6899 .nr(4)
6900 .kr(2)
6901 .sr(1)
6902 .m(2)
6903 .n(4)
6904 .k(k)
6905 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
6906 }
6907 }
6908
6909 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_div_8_subtile) {
6910 TEST_REQUIRES_X86_AVX;
6911 for (size_t k = 16; k <= 80; k += 8) {
6912 for (uint32_t m = 1; m <= 2; m++) {
6913 for (uint32_t n = 1; n <= 4; n++) {
6914 GemmMicrokernelTester()
6915 .mr(2)
6916 .nr(4)
6917 .kr(2)
6918 .sr(1)
6919 .m(m)
6920 .n(n)
6921 .k(k)
6922 .iterations(1)
6923 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
6924 }
6925 }
6926 }
6927 }
6928
6929 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_gt_4) {
6930 TEST_REQUIRES_X86_AVX;
6931 for (uint32_t n = 5; n < 8; n++) {
6932 for (size_t k = 1; k <= 40; k += 9) {
6933 GemmMicrokernelTester()
6934 .mr(2)
6935 .nr(4)
6936 .kr(2)
6937 .sr(1)
6938 .m(2)
6939 .n(4)
6940 .k(k)
6941 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
6942 }
6943 }
6944 }
6945
6946 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_gt_4_strided_cn) {
6947 TEST_REQUIRES_X86_AVX;
6948 for (uint32_t n = 5; n < 8; n++) {
6949 for (size_t k = 1; k <= 40; k += 9) {
6950 GemmMicrokernelTester()
6951 .mr(2)
6952 .nr(4)
6953 .kr(2)
6954 .sr(1)
6955 .m(2)
6956 .n(4)
6957 .k(k)
6958 .cn_stride(7)
6959 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
6960 }
6961 }
6962 }
6963
6964 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_gt_4_subtile) {
6965 TEST_REQUIRES_X86_AVX;
6966 for (uint32_t n = 5; n < 8; n++) {
6967 for (size_t k = 1; k <= 40; k += 9) {
6968 for (uint32_t m = 1; m <= 2; m++) {
6969 GemmMicrokernelTester()
6970 .mr(2)
6971 .nr(4)
6972 .kr(2)
6973 .sr(1)
6974 .m(m)
6975 .n(n)
6976 .k(k)
6977 .iterations(1)
6978 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
6979 }
6980 }
6981 }
6982 }
6983
6984 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_div_4) {
6985 TEST_REQUIRES_X86_AVX;
6986 for (uint32_t n = 8; n <= 12; n += 4) {
6987 for (size_t k = 1; k <= 40; k += 9) {
6988 GemmMicrokernelTester()
6989 .mr(2)
6990 .nr(4)
6991 .kr(2)
6992 .sr(1)
6993 .m(2)
6994 .n(4)
6995 .k(k)
6996 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
6997 }
6998 }
6999 }
7000
7001 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_div_4_strided_cn) {
7002 TEST_REQUIRES_X86_AVX;
7003 for (uint32_t n = 8; n <= 12; n += 4) {
7004 for (size_t k = 1; k <= 40; k += 9) {
7005 GemmMicrokernelTester()
7006 .mr(2)
7007 .nr(4)
7008 .kr(2)
7009 .sr(1)
7010 .m(2)
7011 .n(n)
7012 .k(k)
7013 .cn_stride(7)
7014 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
7015 }
7016 }
7017 }
7018
7019 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_div_4_subtile) {
7020 TEST_REQUIRES_X86_AVX;
7021 for (uint32_t n = 8; n <= 12; n += 4) {
7022 for (size_t k = 1; k <= 40; k += 9) {
7023 for (uint32_t m = 1; m <= 2; m++) {
7024 GemmMicrokernelTester()
7025 .mr(2)
7026 .nr(4)
7027 .kr(2)
7028 .sr(1)
7029 .m(m)
7030 .n(n)
7031 .k(k)
7032 .iterations(1)
7033 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
7034 }
7035 }
7036 }
7037 }
7038
7039 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, small_kernel) {
7040 TEST_REQUIRES_X86_AVX;
7041 for (size_t k = 1; k <= 40; k += 9) {
7042 GemmMicrokernelTester()
7043 .mr(2)
7044 .nr(4)
7045 .kr(2)
7046 .sr(1)
7047 .m(2)
7048 .n(4)
7049 .k(k)
7050 .ks(3)
7051 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
7052 }
7053 }
7054
7055 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, small_kernel_subtile) {
7056 TEST_REQUIRES_X86_AVX;
7057 for (size_t k = 1; k <= 40; k += 9) {
7058 for (uint32_t m = 1; m <= 2; m++) {
7059 for (uint32_t n = 1; n <= 4; n++) {
7060 GemmMicrokernelTester()
7061 .mr(2)
7062 .nr(4)
7063 .kr(2)
7064 .sr(1)
7065 .m(m)
7066 .n(n)
7067 .k(k)
7068 .ks(3)
7069 .iterations(1)
7070 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
7071 }
7072 }
7073 }
7074 }
7075
7076 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_gt_4_small_kernel) {
7077 TEST_REQUIRES_X86_AVX;
7078 for (uint32_t n = 5; n < 8; n++) {
7079 for (size_t k = 1; k <= 40; k += 9) {
7080 GemmMicrokernelTester()
7081 .mr(2)
7082 .nr(4)
7083 .kr(2)
7084 .sr(1)
7085 .m(2)
7086 .n(4)
7087 .k(k)
7088 .ks(3)
7089 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
7090 }
7091 }
7092 }
7093
7094 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_div_4_small_kernel) {
7095 TEST_REQUIRES_X86_AVX;
7096 for (uint32_t n = 8; n <= 12; n += 4) {
7097 for (size_t k = 1; k <= 40; k += 9) {
7098 GemmMicrokernelTester()
7099 .mr(2)
7100 .nr(4)
7101 .kr(2)
7102 .sr(1)
7103 .m(2)
7104 .n(4)
7105 .k(k)
7106 .ks(3)
7107 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
7108 }
7109 }
7110 }
7111
7112 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, strided_cm_subtile) {
7113 TEST_REQUIRES_X86_AVX;
7114 for (size_t k = 1; k <= 40; k += 9) {
7115 for (uint32_t m = 1; m <= 2; m++) {
7116 for (uint32_t n = 1; n <= 4; n++) {
7117 GemmMicrokernelTester()
7118 .mr(2)
7119 .nr(4)
7120 .kr(2)
7121 .sr(1)
7122 .m(m)
7123 .n(n)
7124 .k(k)
7125 .cm_stride(7)
7126 .iterations(1)
7127 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
7128 }
7129 }
7130 }
7131 }
7132
7133 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, a_offset) {
7134 TEST_REQUIRES_X86_AVX;
7135 for (size_t k = 1; k <= 40; k += 9) {
7136 GemmMicrokernelTester()
7137 .mr(2)
7138 .nr(4)
7139 .kr(2)
7140 .sr(1)
7141 .m(2)
7142 .n(4)
7143 .k(k)
7144 .ks(3)
7145 .a_offset(83)
7146 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
7147 }
7148 }
7149
7150 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, zero) {
7151 TEST_REQUIRES_X86_AVX;
7152 for (uint32_t mz = 0; mz < 2; mz++) {
7153 for (size_t k = 1; k <= 40; k += 9) {
7154 GemmMicrokernelTester()
7155 .mr(2)
7156 .nr(4)
7157 .kr(2)
7158 .sr(1)
7159 .m(2)
7160 .n(4)
7161 .k(k)
7162 .ks(3)
7163 .a_offset(83)
7164 .zero_index(mz)
7165 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
7166 }
7167 }
7168 }
7169
7170 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, qmin) {
7171 TEST_REQUIRES_X86_AVX;
7172 GemmMicrokernelTester()
7173 .mr(2)
7174 .nr(4)
7175 .kr(2)
7176 .sr(1)
7177 .m(2)
7178 .n(4)
7179 .k(8)
7180 .qmin(128)
7181 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
7182 }
7183
7184 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, qmax) {
7185 TEST_REQUIRES_X86_AVX;
7186 GemmMicrokernelTester()
7187 .mr(2)
7188 .nr(4)
7189 .kr(2)
7190 .sr(1)
7191 .m(2)
7192 .n(4)
7193 .k(8)
7194 .qmax(128)
7195 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
7196 }
7197
7198 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, strided_cm) {
7199 TEST_REQUIRES_X86_AVX;
7200 GemmMicrokernelTester()
7201 .mr(2)
7202 .nr(4)
7203 .kr(2)
7204 .sr(1)
7205 .m(2)
7206 .n(4)
7207 .k(8)
7208 .cm_stride(7)
7209 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
7210 }
7211
7212 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, no_a_zero_point) {
7213 TEST_REQUIRES_X86_AVX;
7214 for (size_t k = 1; k <= 40; k += 9) {
7215 GemmMicrokernelTester()
7216 .mr(2)
7217 .nr(4)
7218 .kr(2)
7219 .sr(1)
7220 .m(2)
7221 .n(4)
7222 .k(k)
7223 .a_zero_point(0)
7224 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
7225 }
7226 }
7227
7228 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, no_b_zero_point) {
7229 TEST_REQUIRES_X86_AVX;
7230 for (size_t k = 1; k <= 40; k += 9) {
7231 GemmMicrokernelTester()
7232 .mr(2)
7233 .nr(4)
7234 .kr(2)
7235 .sr(1)
7236 .m(2)
7237 .n(4)
7238 .k(k)
7239 .b_zero_point(0)
7240 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
7241 }
7242 }
7243
7244 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, no_zero_point) {
7245 TEST_REQUIRES_X86_AVX;
7246 for (size_t k = 1; k <= 40; k += 9) {
7247 GemmMicrokernelTester()
7248 .mr(2)
7249 .nr(4)
7250 .kr(2)
7251 .sr(1)
7252 .m(2)
7253 .n(4)
7254 .k(k)
7255 .a_zero_point(0)
7256 .b_zero_point(0)
7257 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
7258 }
7259 }
7260#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7261
7262
7263#if XNN_ARCH_X86 || XNN_ARCH_X86_64
7264 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_eq_8) {
7265 TEST_REQUIRES_X86_AVX;
7266 GemmMicrokernelTester()
7267 .mr(3)
7268 .nr(4)
7269 .kr(2)
7270 .sr(1)
7271 .m(3)
7272 .n(4)
7273 .k(8)
7274 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
7275 }
7276
7277 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, strided_cn) {
7278 TEST_REQUIRES_X86_AVX;
7279 GemmMicrokernelTester()
7280 .mr(3)
7281 .nr(4)
7282 .kr(2)
7283 .sr(1)
7284 .m(3)
7285 .n(4)
7286 .k(8)
7287 .cn_stride(7)
7288 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
7289 }
7290
7291 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_eq_8_subtile) {
7292 TEST_REQUIRES_X86_AVX;
7293 for (uint32_t m = 1; m <= 3; m++) {
7294 for (uint32_t n = 1; n <= 4; n++) {
7295 GemmMicrokernelTester()
7296 .mr(3)
7297 .nr(4)
7298 .kr(2)
7299 .sr(1)
7300 .m(m)
7301 .n(n)
7302 .k(8)
7303 .iterations(1)
7304 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
7305 }
7306 }
7307 }
7308
7309 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_eq_8_subtile_m) {
7310 TEST_REQUIRES_X86_AVX;
7311 for (uint32_t m = 1; m <= 3; m++) {
7312 GemmMicrokernelTester()
7313 .mr(3)
7314 .nr(4)
7315 .kr(2)
7316 .sr(1)
7317 .m(m)
7318 .n(4)
7319 .k(8)
7320 .iterations(1)
7321 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
7322 }
7323 }
7324
7325 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_eq_8_subtile_n) {
7326 TEST_REQUIRES_X86_AVX;
7327 for (uint32_t n = 1; n <= 4; n++) {
7328 GemmMicrokernelTester()
7329 .mr(3)
7330 .nr(4)
7331 .kr(2)
7332 .sr(1)
7333 .m(3)
7334 .n(n)
7335 .k(8)
7336 .iterations(1)
7337 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
7338 }
7339 }
7340
7341 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_lt_8) {
7342 TEST_REQUIRES_X86_AVX;
7343 for (size_t k = 1; k < 8; k++) {
7344 GemmMicrokernelTester()
7345 .mr(3)
7346 .nr(4)
7347 .kr(2)
7348 .sr(1)
7349 .m(3)
7350 .n(4)
7351 .k(k)
7352 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
7353 }
7354 }
7355
7356 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_lt_8_subtile) {
7357 TEST_REQUIRES_X86_AVX;
7358 for (size_t k = 1; k < 8; k++) {
7359 for (uint32_t m = 1; m <= 3; m++) {
7360 for (uint32_t n = 1; n <= 4; n++) {
7361 GemmMicrokernelTester()
7362 .mr(3)
7363 .nr(4)
7364 .kr(2)
7365 .sr(1)
7366 .m(m)
7367 .n(n)
7368 .k(k)
7369 .iterations(1)
7370 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
7371 }
7372 }
7373 }
7374 }
7375
7376 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_gt_8) {
7377 TEST_REQUIRES_X86_AVX;
7378 for (size_t k = 9; k < 16; k++) {
7379 GemmMicrokernelTester()
7380 .mr(3)
7381 .nr(4)
7382 .kr(2)
7383 .sr(1)
7384 .m(3)
7385 .n(4)
7386 .k(k)
7387 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
7388 }
7389 }
7390
7391 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_gt_8_subtile) {
7392 TEST_REQUIRES_X86_AVX;
7393 for (size_t k = 9; k < 16; k++) {
7394 for (uint32_t m = 1; m <= 3; m++) {
7395 for (uint32_t n = 1; n <= 4; n++) {
7396 GemmMicrokernelTester()
7397 .mr(3)
7398 .nr(4)
7399 .kr(2)
7400 .sr(1)
7401 .m(m)
7402 .n(n)
7403 .k(k)
7404 .iterations(1)
7405 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
7406 }
7407 }
7408 }
7409 }
7410
7411 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_div_8) {
7412 TEST_REQUIRES_X86_AVX;
7413 for (size_t k = 16; k <= 80; k += 8) {
7414 GemmMicrokernelTester()
7415 .mr(3)
7416 .nr(4)
7417 .kr(2)
7418 .sr(1)
7419 .m(3)
7420 .n(4)
7421 .k(k)
7422 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
7423 }
7424 }
7425
7426 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_div_8_subtile) {
7427 TEST_REQUIRES_X86_AVX;
7428 for (size_t k = 16; k <= 80; k += 8) {
7429 for (uint32_t m = 1; m <= 3; m++) {
7430 for (uint32_t n = 1; n <= 4; n++) {
7431 GemmMicrokernelTester()
7432 .mr(3)
7433 .nr(4)
7434 .kr(2)
7435 .sr(1)
7436 .m(m)
7437 .n(n)
7438 .k(k)
7439 .iterations(1)
7440 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
7441 }
7442 }
7443 }
7444 }
7445
7446 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_gt_4) {
7447 TEST_REQUIRES_X86_AVX;
7448 for (uint32_t n = 5; n < 8; n++) {
7449 for (size_t k = 1; k <= 40; k += 9) {
7450 GemmMicrokernelTester()
7451 .mr(3)
7452 .nr(4)
7453 .kr(2)
7454 .sr(1)
7455 .m(3)
7456 .n(4)
7457 .k(k)
7458 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
7459 }
7460 }
7461 }
7462
7463 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_gt_4_strided_cn) {
7464 TEST_REQUIRES_X86_AVX;
7465 for (uint32_t n = 5; n < 8; n++) {
7466 for (size_t k = 1; k <= 40; k += 9) {
7467 GemmMicrokernelTester()
7468 .mr(3)
7469 .nr(4)
7470 .kr(2)
7471 .sr(1)
7472 .m(3)
7473 .n(4)
7474 .k(k)
7475 .cn_stride(7)
7476 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
7477 }
7478 }
7479 }
7480
7481 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_gt_4_subtile) {
7482 TEST_REQUIRES_X86_AVX;
7483 for (uint32_t n = 5; n < 8; n++) {
7484 for (size_t k = 1; k <= 40; k += 9) {
7485 for (uint32_t m = 1; m <= 3; m++) {
7486 GemmMicrokernelTester()
7487 .mr(3)
7488 .nr(4)
7489 .kr(2)
7490 .sr(1)
7491 .m(m)
7492 .n(n)
7493 .k(k)
7494 .iterations(1)
7495 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
7496 }
7497 }
7498 }
7499 }
7500
7501 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_div_4) {
7502 TEST_REQUIRES_X86_AVX;
7503 for (uint32_t n = 8; n <= 12; n += 4) {
7504 for (size_t k = 1; k <= 40; k += 9) {
7505 GemmMicrokernelTester()
7506 .mr(3)
7507 .nr(4)
7508 .kr(2)
7509 .sr(1)
7510 .m(3)
7511 .n(4)
7512 .k(k)
7513 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
7514 }
7515 }
7516 }
7517
7518 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_div_4_strided_cn) {
7519 TEST_REQUIRES_X86_AVX;
7520 for (uint32_t n = 8; n <= 12; n += 4) {
7521 for (size_t k = 1; k <= 40; k += 9) {
7522 GemmMicrokernelTester()
7523 .mr(3)
7524 .nr(4)
7525 .kr(2)
7526 .sr(1)
7527 .m(3)
7528 .n(n)
7529 .k(k)
7530 .cn_stride(7)
7531 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
7532 }
7533 }
7534 }
7535
7536 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_div_4_subtile) {
7537 TEST_REQUIRES_X86_AVX;
7538 for (uint32_t n = 8; n <= 12; n += 4) {
7539 for (size_t k = 1; k <= 40; k += 9) {
7540 for (uint32_t m = 1; m <= 3; m++) {
7541 GemmMicrokernelTester()
7542 .mr(3)
7543 .nr(4)
7544 .kr(2)
7545 .sr(1)
7546 .m(m)
7547 .n(n)
7548 .k(k)
7549 .iterations(1)
7550 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
7551 }
7552 }
7553 }
7554 }
7555
7556 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, small_kernel) {
7557 TEST_REQUIRES_X86_AVX;
7558 for (size_t k = 1; k <= 40; k += 9) {
7559 GemmMicrokernelTester()
7560 .mr(3)
7561 .nr(4)
7562 .kr(2)
7563 .sr(1)
7564 .m(3)
7565 .n(4)
7566 .k(k)
7567 .ks(3)
7568 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
7569 }
7570 }
7571
7572 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, small_kernel_subtile) {
7573 TEST_REQUIRES_X86_AVX;
7574 for (size_t k = 1; k <= 40; k += 9) {
7575 for (uint32_t m = 1; m <= 3; m++) {
7576 for (uint32_t n = 1; n <= 4; n++) {
7577 GemmMicrokernelTester()
7578 .mr(3)
7579 .nr(4)
7580 .kr(2)
7581 .sr(1)
7582 .m(m)
7583 .n(n)
7584 .k(k)
7585 .ks(3)
7586 .iterations(1)
7587 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
7588 }
7589 }
7590 }
7591 }
7592
7593 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_gt_4_small_kernel) {
7594 TEST_REQUIRES_X86_AVX;
7595 for (uint32_t n = 5; n < 8; n++) {
7596 for (size_t k = 1; k <= 40; k += 9) {
7597 GemmMicrokernelTester()
7598 .mr(3)
7599 .nr(4)
7600 .kr(2)
7601 .sr(1)
7602 .m(3)
7603 .n(4)
7604 .k(k)
7605 .ks(3)
7606 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
7607 }
7608 }
7609 }
7610
7611 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_div_4_small_kernel) {
7612 TEST_REQUIRES_X86_AVX;
7613 for (uint32_t n = 8; n <= 12; n += 4) {
7614 for (size_t k = 1; k <= 40; k += 9) {
7615 GemmMicrokernelTester()
7616 .mr(3)
7617 .nr(4)
7618 .kr(2)
7619 .sr(1)
7620 .m(3)
7621 .n(4)
7622 .k(k)
7623 .ks(3)
7624 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
7625 }
7626 }
7627 }
7628
7629 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, strided_cm_subtile) {
7630 TEST_REQUIRES_X86_AVX;
7631 for (size_t k = 1; k <= 40; k += 9) {
7632 for (uint32_t m = 1; m <= 3; m++) {
7633 for (uint32_t n = 1; n <= 4; n++) {
7634 GemmMicrokernelTester()
7635 .mr(3)
7636 .nr(4)
7637 .kr(2)
7638 .sr(1)
7639 .m(m)
7640 .n(n)
7641 .k(k)
7642 .cm_stride(7)
7643 .iterations(1)
7644 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
7645 }
7646 }
7647 }
7648 }
7649
7650 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, a_offset) {
7651 TEST_REQUIRES_X86_AVX;
7652 for (size_t k = 1; k <= 40; k += 9) {
7653 GemmMicrokernelTester()
7654 .mr(3)
7655 .nr(4)
7656 .kr(2)
7657 .sr(1)
7658 .m(3)
7659 .n(4)
7660 .k(k)
7661 .ks(3)
7662 .a_offset(127)
7663 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
7664 }
7665 }
7666
7667 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, zero) {
7668 TEST_REQUIRES_X86_AVX;
7669 for (uint32_t mz = 0; mz < 3; mz++) {
7670 for (size_t k = 1; k <= 40; k += 9) {
7671 GemmMicrokernelTester()
7672 .mr(3)
7673 .nr(4)
7674 .kr(2)
7675 .sr(1)
7676 .m(3)
7677 .n(4)
7678 .k(k)
7679 .ks(3)
7680 .a_offset(127)
7681 .zero_index(mz)
7682 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
7683 }
7684 }
7685 }
7686
7687 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, qmin) {
7688 TEST_REQUIRES_X86_AVX;
7689 GemmMicrokernelTester()
7690 .mr(3)
7691 .nr(4)
7692 .kr(2)
7693 .sr(1)
7694 .m(3)
7695 .n(4)
7696 .k(8)
7697 .qmin(128)
7698 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
7699 }
7700
7701 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, qmax) {
7702 TEST_REQUIRES_X86_AVX;
7703 GemmMicrokernelTester()
7704 .mr(3)
7705 .nr(4)
7706 .kr(2)
7707 .sr(1)
7708 .m(3)
7709 .n(4)
7710 .k(8)
7711 .qmax(128)
7712 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
7713 }
7714
7715 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, strided_cm) {
7716 TEST_REQUIRES_X86_AVX;
7717 GemmMicrokernelTester()
7718 .mr(3)
7719 .nr(4)
7720 .kr(2)
7721 .sr(1)
7722 .m(3)
7723 .n(4)
7724 .k(8)
7725 .cm_stride(7)
7726 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
7727 }
7728
7729 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, no_a_zero_point) {
7730 TEST_REQUIRES_X86_AVX;
7731 for (size_t k = 1; k <= 40; k += 9) {
7732 GemmMicrokernelTester()
7733 .mr(3)
7734 .nr(4)
7735 .kr(2)
7736 .sr(1)
7737 .m(3)
7738 .n(4)
7739 .k(k)
7740 .a_zero_point(0)
7741 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
7742 }
7743 }
7744
7745 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, no_b_zero_point) {
7746 TEST_REQUIRES_X86_AVX;
7747 for (size_t k = 1; k <= 40; k += 9) {
7748 GemmMicrokernelTester()
7749 .mr(3)
7750 .nr(4)
7751 .kr(2)
7752 .sr(1)
7753 .m(3)
7754 .n(4)
7755 .k(k)
7756 .b_zero_point(0)
7757 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
7758 }
7759 }
7760
7761 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, no_zero_point) {
7762 TEST_REQUIRES_X86_AVX;
7763 for (size_t k = 1; k <= 40; k += 9) {
7764 GemmMicrokernelTester()
7765 .mr(3)
7766 .nr(4)
7767 .kr(2)
7768 .sr(1)
7769 .m(3)
7770 .n(4)
7771 .k(k)
7772 .a_zero_point(0)
7773 .b_zero_point(0)
7774 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
7775 }
7776 }
7777#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7778
7779
7780#if XNN_ARCH_X86 || XNN_ARCH_X86_64
7781 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_eq_8) {
7782 TEST_REQUIRES_X86_AVX;
7783 GemmMicrokernelTester()
7784 .mr(4)
7785 .nr(4)
7786 .kr(2)
7787 .sr(1)
7788 .m(4)
7789 .n(4)
7790 .k(8)
7791 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
7792 }
7793
7794 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, strided_cn) {
7795 TEST_REQUIRES_X86_AVX;
7796 GemmMicrokernelTester()
7797 .mr(4)
7798 .nr(4)
7799 .kr(2)
7800 .sr(1)
7801 .m(4)
7802 .n(4)
7803 .k(8)
7804 .cn_stride(7)
7805 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
7806 }
7807
7808 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_eq_8_subtile) {
7809 TEST_REQUIRES_X86_AVX;
7810 for (uint32_t m = 1; m <= 4; m++) {
7811 for (uint32_t n = 1; n <= 4; n++) {
7812 GemmMicrokernelTester()
7813 .mr(4)
7814 .nr(4)
7815 .kr(2)
7816 .sr(1)
7817 .m(m)
7818 .n(n)
7819 .k(8)
7820 .iterations(1)
7821 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
7822 }
7823 }
7824 }
7825
7826 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_eq_8_subtile_m) {
7827 TEST_REQUIRES_X86_AVX;
7828 for (uint32_t m = 1; m <= 4; m++) {
7829 GemmMicrokernelTester()
7830 .mr(4)
7831 .nr(4)
7832 .kr(2)
7833 .sr(1)
7834 .m(m)
7835 .n(4)
7836 .k(8)
7837 .iterations(1)
7838 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
7839 }
7840 }
7841
7842 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_eq_8_subtile_n) {
7843 TEST_REQUIRES_X86_AVX;
7844 for (uint32_t n = 1; n <= 4; n++) {
7845 GemmMicrokernelTester()
7846 .mr(4)
7847 .nr(4)
7848 .kr(2)
7849 .sr(1)
7850 .m(4)
7851 .n(n)
7852 .k(8)
7853 .iterations(1)
7854 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
7855 }
7856 }
7857
7858 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_lt_8) {
7859 TEST_REQUIRES_X86_AVX;
7860 for (size_t k = 1; k < 8; k++) {
7861 GemmMicrokernelTester()
7862 .mr(4)
7863 .nr(4)
7864 .kr(2)
7865 .sr(1)
7866 .m(4)
7867 .n(4)
7868 .k(k)
7869 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
7870 }
7871 }
7872
7873 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_lt_8_subtile) {
7874 TEST_REQUIRES_X86_AVX;
7875 for (size_t k = 1; k < 8; k++) {
7876 for (uint32_t m = 1; m <= 4; m++) {
7877 for (uint32_t n = 1; n <= 4; n++) {
7878 GemmMicrokernelTester()
7879 .mr(4)
7880 .nr(4)
7881 .kr(2)
7882 .sr(1)
7883 .m(m)
7884 .n(n)
7885 .k(k)
7886 .iterations(1)
7887 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
7888 }
7889 }
7890 }
7891 }
7892
7893 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_gt_8) {
7894 TEST_REQUIRES_X86_AVX;
7895 for (size_t k = 9; k < 16; k++) {
7896 GemmMicrokernelTester()
7897 .mr(4)
7898 .nr(4)
7899 .kr(2)
7900 .sr(1)
7901 .m(4)
7902 .n(4)
7903 .k(k)
7904 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
7905 }
7906 }
7907
7908 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_gt_8_subtile) {
7909 TEST_REQUIRES_X86_AVX;
7910 for (size_t k = 9; k < 16; k++) {
7911 for (uint32_t m = 1; m <= 4; m++) {
7912 for (uint32_t n = 1; n <= 4; n++) {
7913 GemmMicrokernelTester()
7914 .mr(4)
7915 .nr(4)
7916 .kr(2)
7917 .sr(1)
7918 .m(m)
7919 .n(n)
7920 .k(k)
7921 .iterations(1)
7922 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
7923 }
7924 }
7925 }
7926 }
7927
7928 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_div_8) {
7929 TEST_REQUIRES_X86_AVX;
7930 for (size_t k = 16; k <= 80; k += 8) {
7931 GemmMicrokernelTester()
7932 .mr(4)
7933 .nr(4)
7934 .kr(2)
7935 .sr(1)
7936 .m(4)
7937 .n(4)
7938 .k(k)
7939 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
7940 }
7941 }
7942
7943 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_div_8_subtile) {
7944 TEST_REQUIRES_X86_AVX;
7945 for (size_t k = 16; k <= 80; k += 8) {
7946 for (uint32_t m = 1; m <= 4; m++) {
7947 for (uint32_t n = 1; n <= 4; n++) {
7948 GemmMicrokernelTester()
7949 .mr(4)
7950 .nr(4)
7951 .kr(2)
7952 .sr(1)
7953 .m(m)
7954 .n(n)
7955 .k(k)
7956 .iterations(1)
7957 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
7958 }
7959 }
7960 }
7961 }
7962
7963 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_gt_4) {
7964 TEST_REQUIRES_X86_AVX;
7965 for (uint32_t n = 5; n < 8; n++) {
7966 for (size_t k = 1; k <= 40; k += 9) {
7967 GemmMicrokernelTester()
7968 .mr(4)
7969 .nr(4)
7970 .kr(2)
7971 .sr(1)
7972 .m(4)
7973 .n(4)
7974 .k(k)
7975 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
7976 }
7977 }
7978 }
7979
7980 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_gt_4_strided_cn) {
7981 TEST_REQUIRES_X86_AVX;
7982 for (uint32_t n = 5; n < 8; n++) {
7983 for (size_t k = 1; k <= 40; k += 9) {
7984 GemmMicrokernelTester()
7985 .mr(4)
7986 .nr(4)
7987 .kr(2)
7988 .sr(1)
7989 .m(4)
7990 .n(4)
7991 .k(k)
7992 .cn_stride(7)
7993 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
7994 }
7995 }
7996 }
7997
7998 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_gt_4_subtile) {
7999 TEST_REQUIRES_X86_AVX;
8000 for (uint32_t n = 5; n < 8; n++) {
8001 for (size_t k = 1; k <= 40; k += 9) {
8002 for (uint32_t m = 1; m <= 4; m++) {
8003 GemmMicrokernelTester()
8004 .mr(4)
8005 .nr(4)
8006 .kr(2)
8007 .sr(1)
8008 .m(m)
8009 .n(n)
8010 .k(k)
8011 .iterations(1)
8012 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
8013 }
8014 }
8015 }
8016 }
8017
8018 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_div_4) {
8019 TEST_REQUIRES_X86_AVX;
8020 for (uint32_t n = 8; n <= 12; n += 4) {
8021 for (size_t k = 1; k <= 40; k += 9) {
8022 GemmMicrokernelTester()
8023 .mr(4)
8024 .nr(4)
8025 .kr(2)
8026 .sr(1)
8027 .m(4)
8028 .n(4)
8029 .k(k)
8030 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
8031 }
8032 }
8033 }
8034
8035 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_div_4_strided_cn) {
8036 TEST_REQUIRES_X86_AVX;
8037 for (uint32_t n = 8; n <= 12; n += 4) {
8038 for (size_t k = 1; k <= 40; k += 9) {
8039 GemmMicrokernelTester()
8040 .mr(4)
8041 .nr(4)
8042 .kr(2)
8043 .sr(1)
8044 .m(4)
8045 .n(n)
8046 .k(k)
8047 .cn_stride(7)
8048 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
8049 }
8050 }
8051 }
8052
8053 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_div_4_subtile) {
8054 TEST_REQUIRES_X86_AVX;
8055 for (uint32_t n = 8; n <= 12; n += 4) {
8056 for (size_t k = 1; k <= 40; k += 9) {
8057 for (uint32_t m = 1; m <= 4; m++) {
8058 GemmMicrokernelTester()
8059 .mr(4)
8060 .nr(4)
8061 .kr(2)
8062 .sr(1)
8063 .m(m)
8064 .n(n)
8065 .k(k)
8066 .iterations(1)
8067 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
8068 }
8069 }
8070 }
8071 }
8072
8073 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, small_kernel) {
8074 TEST_REQUIRES_X86_AVX;
8075 for (size_t k = 1; k <= 40; k += 9) {
8076 GemmMicrokernelTester()
8077 .mr(4)
8078 .nr(4)
8079 .kr(2)
8080 .sr(1)
8081 .m(4)
8082 .n(4)
8083 .k(k)
8084 .ks(3)
8085 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
8086 }
8087 }
8088
8089 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, small_kernel_subtile) {
8090 TEST_REQUIRES_X86_AVX;
8091 for (size_t k = 1; k <= 40; k += 9) {
8092 for (uint32_t m = 1; m <= 4; m++) {
8093 for (uint32_t n = 1; n <= 4; n++) {
8094 GemmMicrokernelTester()
8095 .mr(4)
8096 .nr(4)
8097 .kr(2)
8098 .sr(1)
8099 .m(m)
8100 .n(n)
8101 .k(k)
8102 .ks(3)
8103 .iterations(1)
8104 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
8105 }
8106 }
8107 }
8108 }
8109
8110 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_gt_4_small_kernel) {
8111 TEST_REQUIRES_X86_AVX;
8112 for (uint32_t n = 5; n < 8; n++) {
8113 for (size_t k = 1; k <= 40; k += 9) {
8114 GemmMicrokernelTester()
8115 .mr(4)
8116 .nr(4)
8117 .kr(2)
8118 .sr(1)
8119 .m(4)
8120 .n(4)
8121 .k(k)
8122 .ks(3)
8123 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
8124 }
8125 }
8126 }
8127
8128 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_div_4_small_kernel) {
8129 TEST_REQUIRES_X86_AVX;
8130 for (uint32_t n = 8; n <= 12; n += 4) {
8131 for (size_t k = 1; k <= 40; k += 9) {
8132 GemmMicrokernelTester()
8133 .mr(4)
8134 .nr(4)
8135 .kr(2)
8136 .sr(1)
8137 .m(4)
8138 .n(4)
8139 .k(k)
8140 .ks(3)
8141 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
8142 }
8143 }
8144 }
8145
8146 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, strided_cm_subtile) {
8147 TEST_REQUIRES_X86_AVX;
8148 for (size_t k = 1; k <= 40; k += 9) {
8149 for (uint32_t m = 1; m <= 4; m++) {
8150 for (uint32_t n = 1; n <= 4; n++) {
8151 GemmMicrokernelTester()
8152 .mr(4)
8153 .nr(4)
8154 .kr(2)
8155 .sr(1)
8156 .m(m)
8157 .n(n)
8158 .k(k)
8159 .cm_stride(7)
8160 .iterations(1)
8161 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
8162 }
8163 }
8164 }
8165 }
8166
8167 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, a_offset) {
8168 TEST_REQUIRES_X86_AVX;
8169 for (size_t k = 1; k <= 40; k += 9) {
8170 GemmMicrokernelTester()
8171 .mr(4)
8172 .nr(4)
8173 .kr(2)
8174 .sr(1)
8175 .m(4)
8176 .n(4)
8177 .k(k)
8178 .ks(3)
8179 .a_offset(163)
8180 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
8181 }
8182 }
8183
8184 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, zero) {
8185 TEST_REQUIRES_X86_AVX;
8186 for (uint32_t mz = 0; mz < 4; mz++) {
8187 for (size_t k = 1; k <= 40; k += 9) {
8188 GemmMicrokernelTester()
8189 .mr(4)
8190 .nr(4)
8191 .kr(2)
8192 .sr(1)
8193 .m(4)
8194 .n(4)
8195 .k(k)
8196 .ks(3)
8197 .a_offset(163)
8198 .zero_index(mz)
8199 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
8200 }
8201 }
8202 }
8203
8204 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, qmin) {
8205 TEST_REQUIRES_X86_AVX;
8206 GemmMicrokernelTester()
8207 .mr(4)
8208 .nr(4)
8209 .kr(2)
8210 .sr(1)
8211 .m(4)
8212 .n(4)
8213 .k(8)
8214 .qmin(128)
8215 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
8216 }
8217
8218 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, qmax) {
8219 TEST_REQUIRES_X86_AVX;
8220 GemmMicrokernelTester()
8221 .mr(4)
8222 .nr(4)
8223 .kr(2)
8224 .sr(1)
8225 .m(4)
8226 .n(4)
8227 .k(8)
8228 .qmax(128)
8229 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
8230 }
8231
8232 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, strided_cm) {
8233 TEST_REQUIRES_X86_AVX;
8234 GemmMicrokernelTester()
8235 .mr(4)
8236 .nr(4)
8237 .kr(2)
8238 .sr(1)
8239 .m(4)
8240 .n(4)
8241 .k(8)
8242 .cm_stride(7)
8243 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
8244 }
8245
8246 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, no_a_zero_point) {
8247 TEST_REQUIRES_X86_AVX;
8248 for (size_t k = 1; k <= 40; k += 9) {
8249 GemmMicrokernelTester()
8250 .mr(4)
8251 .nr(4)
8252 .kr(2)
8253 .sr(1)
8254 .m(4)
8255 .n(4)
8256 .k(k)
8257 .a_zero_point(0)
8258 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
8259 }
8260 }
8261
8262 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, no_b_zero_point) {
8263 TEST_REQUIRES_X86_AVX;
8264 for (size_t k = 1; k <= 40; k += 9) {
8265 GemmMicrokernelTester()
8266 .mr(4)
8267 .nr(4)
8268 .kr(2)
8269 .sr(1)
8270 .m(4)
8271 .n(4)
8272 .k(k)
8273 .b_zero_point(0)
8274 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
8275 }
8276 }
8277
8278 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, no_zero_point) {
8279 TEST_REQUIRES_X86_AVX;
8280 for (size_t k = 1; k <= 40; k += 9) {
8281 GemmMicrokernelTester()
8282 .mr(4)
8283 .nr(4)
8284 .kr(2)
8285 .sr(1)
8286 .m(4)
8287 .n(4)
8288 .k(k)
8289 .a_zero_point(0)
8290 .b_zero_point(0)
8291 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
8292 }
8293 }
8294#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8295
8296
8297#if XNN_ARCH_X86 || XNN_ARCH_X86_64
8298 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, k_eq_8) {
8299 TEST_REQUIRES_X86_XOP;
8300 GemmMicrokernelTester()
8301 .mr(1)
8302 .nr(4)
8303 .kr(2)
8304 .sr(1)
8305 .m(1)
8306 .n(4)
8307 .k(8)
8308 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
8309 }
8310
8311 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, strided_cn) {
8312 TEST_REQUIRES_X86_XOP;
8313 GemmMicrokernelTester()
8314 .mr(1)
8315 .nr(4)
8316 .kr(2)
8317 .sr(1)
8318 .m(1)
8319 .n(4)
8320 .k(8)
8321 .cn_stride(7)
8322 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
8323 }
8324
8325 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, k_eq_8_subtile) {
8326 TEST_REQUIRES_X86_XOP;
8327 for (uint32_t m = 1; m <= 1; m++) {
8328 for (uint32_t n = 1; n <= 4; n++) {
8329 GemmMicrokernelTester()
8330 .mr(1)
8331 .nr(4)
8332 .kr(2)
8333 .sr(1)
8334 .m(m)
8335 .n(n)
8336 .k(8)
8337 .iterations(1)
8338 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
8339 }
8340 }
8341 }
8342
8343 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, k_eq_8_subtile_m) {
8344 TEST_REQUIRES_X86_XOP;
8345 for (uint32_t m = 1; m <= 1; m++) {
8346 GemmMicrokernelTester()
8347 .mr(1)
8348 .nr(4)
8349 .kr(2)
8350 .sr(1)
8351 .m(m)
8352 .n(4)
8353 .k(8)
8354 .iterations(1)
8355 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
8356 }
8357 }
8358
8359 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, k_eq_8_subtile_n) {
8360 TEST_REQUIRES_X86_XOP;
8361 for (uint32_t n = 1; n <= 4; n++) {
8362 GemmMicrokernelTester()
8363 .mr(1)
8364 .nr(4)
8365 .kr(2)
8366 .sr(1)
8367 .m(1)
8368 .n(n)
8369 .k(8)
8370 .iterations(1)
8371 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
8372 }
8373 }
8374
8375 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, k_lt_8) {
8376 TEST_REQUIRES_X86_XOP;
8377 for (size_t k = 1; k < 8; k++) {
8378 GemmMicrokernelTester()
8379 .mr(1)
8380 .nr(4)
8381 .kr(2)
8382 .sr(1)
8383 .m(1)
8384 .n(4)
8385 .k(k)
8386 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
8387 }
8388 }
8389
8390 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, k_lt_8_subtile) {
8391 TEST_REQUIRES_X86_XOP;
8392 for (size_t k = 1; k < 8; k++) {
8393 for (uint32_t m = 1; m <= 1; m++) {
8394 for (uint32_t n = 1; n <= 4; n++) {
8395 GemmMicrokernelTester()
8396 .mr(1)
8397 .nr(4)
8398 .kr(2)
8399 .sr(1)
8400 .m(m)
8401 .n(n)
8402 .k(k)
8403 .iterations(1)
8404 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
8405 }
8406 }
8407 }
8408 }
8409
8410 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, k_gt_8) {
8411 TEST_REQUIRES_X86_XOP;
8412 for (size_t k = 9; k < 16; k++) {
8413 GemmMicrokernelTester()
8414 .mr(1)
8415 .nr(4)
8416 .kr(2)
8417 .sr(1)
8418 .m(1)
8419 .n(4)
8420 .k(k)
8421 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
8422 }
8423 }
8424
8425 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, k_gt_8_subtile) {
8426 TEST_REQUIRES_X86_XOP;
8427 for (size_t k = 9; k < 16; k++) {
8428 for (uint32_t m = 1; m <= 1; m++) {
8429 for (uint32_t n = 1; n <= 4; n++) {
8430 GemmMicrokernelTester()
8431 .mr(1)
8432 .nr(4)
8433 .kr(2)
8434 .sr(1)
8435 .m(m)
8436 .n(n)
8437 .k(k)
8438 .iterations(1)
8439 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
8440 }
8441 }
8442 }
8443 }
8444
8445 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, k_div_8) {
8446 TEST_REQUIRES_X86_XOP;
8447 for (size_t k = 16; k <= 80; k += 8) {
8448 GemmMicrokernelTester()
8449 .mr(1)
8450 .nr(4)
8451 .kr(2)
8452 .sr(1)
8453 .m(1)
8454 .n(4)
8455 .k(k)
8456 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
8457 }
8458 }
8459
8460 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, k_div_8_subtile) {
8461 TEST_REQUIRES_X86_XOP;
8462 for (size_t k = 16; k <= 80; k += 8) {
8463 for (uint32_t m = 1; m <= 1; m++) {
8464 for (uint32_t n = 1; n <= 4; n++) {
8465 GemmMicrokernelTester()
8466 .mr(1)
8467 .nr(4)
8468 .kr(2)
8469 .sr(1)
8470 .m(m)
8471 .n(n)
8472 .k(k)
8473 .iterations(1)
8474 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
8475 }
8476 }
8477 }
8478 }
8479
8480 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, n_gt_4) {
8481 TEST_REQUIRES_X86_XOP;
8482 for (uint32_t n = 5; n < 8; n++) {
8483 for (size_t k = 1; k <= 40; k += 9) {
8484 GemmMicrokernelTester()
8485 .mr(1)
8486 .nr(4)
8487 .kr(2)
8488 .sr(1)
8489 .m(1)
8490 .n(4)
8491 .k(k)
8492 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
8493 }
8494 }
8495 }
8496
8497 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, n_gt_4_strided_cn) {
8498 TEST_REQUIRES_X86_XOP;
8499 for (uint32_t n = 5; n < 8; n++) {
8500 for (size_t k = 1; k <= 40; k += 9) {
8501 GemmMicrokernelTester()
8502 .mr(1)
8503 .nr(4)
8504 .kr(2)
8505 .sr(1)
8506 .m(1)
8507 .n(4)
8508 .k(k)
8509 .cn_stride(7)
8510 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
8511 }
8512 }
8513 }
8514
8515 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, n_gt_4_subtile) {
8516 TEST_REQUIRES_X86_XOP;
8517 for (uint32_t n = 5; n < 8; n++) {
8518 for (size_t k = 1; k <= 40; k += 9) {
8519 for (uint32_t m = 1; m <= 1; m++) {
8520 GemmMicrokernelTester()
8521 .mr(1)
8522 .nr(4)
8523 .kr(2)
8524 .sr(1)
8525 .m(m)
8526 .n(n)
8527 .k(k)
8528 .iterations(1)
8529 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
8530 }
8531 }
8532 }
8533 }
8534
8535 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, n_div_4) {
8536 TEST_REQUIRES_X86_XOP;
8537 for (uint32_t n = 8; n <= 12; n += 4) {
8538 for (size_t k = 1; k <= 40; k += 9) {
8539 GemmMicrokernelTester()
8540 .mr(1)
8541 .nr(4)
8542 .kr(2)
8543 .sr(1)
8544 .m(1)
8545 .n(4)
8546 .k(k)
8547 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
8548 }
8549 }
8550 }
8551
8552 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, n_div_4_strided_cn) {
8553 TEST_REQUIRES_X86_XOP;
8554 for (uint32_t n = 8; n <= 12; n += 4) {
8555 for (size_t k = 1; k <= 40; k += 9) {
8556 GemmMicrokernelTester()
8557 .mr(1)
8558 .nr(4)
8559 .kr(2)
8560 .sr(1)
8561 .m(1)
8562 .n(n)
8563 .k(k)
8564 .cn_stride(7)
8565 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
8566 }
8567 }
8568 }
8569
8570 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, n_div_4_subtile) {
8571 TEST_REQUIRES_X86_XOP;
8572 for (uint32_t n = 8; n <= 12; n += 4) {
8573 for (size_t k = 1; k <= 40; k += 9) {
8574 for (uint32_t m = 1; m <= 1; m++) {
8575 GemmMicrokernelTester()
8576 .mr(1)
8577 .nr(4)
8578 .kr(2)
8579 .sr(1)
8580 .m(m)
8581 .n(n)
8582 .k(k)
8583 .iterations(1)
8584 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
8585 }
8586 }
8587 }
8588 }
8589
8590 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, small_kernel) {
8591 TEST_REQUIRES_X86_XOP;
8592 for (size_t k = 1; k <= 40; k += 9) {
8593 GemmMicrokernelTester()
8594 .mr(1)
8595 .nr(4)
8596 .kr(2)
8597 .sr(1)
8598 .m(1)
8599 .n(4)
8600 .k(k)
8601 .ks(3)
8602 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
8603 }
8604 }
8605
8606 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, small_kernel_subtile) {
8607 TEST_REQUIRES_X86_XOP;
8608 for (size_t k = 1; k <= 40; k += 9) {
8609 for (uint32_t m = 1; m <= 1; m++) {
8610 for (uint32_t n = 1; n <= 4; n++) {
8611 GemmMicrokernelTester()
8612 .mr(1)
8613 .nr(4)
8614 .kr(2)
8615 .sr(1)
8616 .m(m)
8617 .n(n)
8618 .k(k)
8619 .ks(3)
8620 .iterations(1)
8621 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
8622 }
8623 }
8624 }
8625 }
8626
8627 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, n_gt_4_small_kernel) {
8628 TEST_REQUIRES_X86_XOP;
8629 for (uint32_t n = 5; n < 8; n++) {
8630 for (size_t k = 1; k <= 40; k += 9) {
8631 GemmMicrokernelTester()
8632 .mr(1)
8633 .nr(4)
8634 .kr(2)
8635 .sr(1)
8636 .m(1)
8637 .n(4)
8638 .k(k)
8639 .ks(3)
8640 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
8641 }
8642 }
8643 }
8644
8645 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, n_div_4_small_kernel) {
8646 TEST_REQUIRES_X86_XOP;
8647 for (uint32_t n = 8; n <= 12; n += 4) {
8648 for (size_t k = 1; k <= 40; k += 9) {
8649 GemmMicrokernelTester()
8650 .mr(1)
8651 .nr(4)
8652 .kr(2)
8653 .sr(1)
8654 .m(1)
8655 .n(4)
8656 .k(k)
8657 .ks(3)
8658 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
8659 }
8660 }
8661 }
8662
8663 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, strided_cm_subtile) {
8664 TEST_REQUIRES_X86_XOP;
8665 for (size_t k = 1; k <= 40; k += 9) {
8666 for (uint32_t m = 1; m <= 1; m++) {
8667 for (uint32_t n = 1; n <= 4; n++) {
8668 GemmMicrokernelTester()
8669 .mr(1)
8670 .nr(4)
8671 .kr(2)
8672 .sr(1)
8673 .m(m)
8674 .n(n)
8675 .k(k)
8676 .cm_stride(7)
8677 .iterations(1)
8678 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
8679 }
8680 }
8681 }
8682 }
8683
8684 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, a_offset) {
8685 TEST_REQUIRES_X86_XOP;
8686 for (size_t k = 1; k <= 40; k += 9) {
8687 GemmMicrokernelTester()
8688 .mr(1)
8689 .nr(4)
8690 .kr(2)
8691 .sr(1)
8692 .m(1)
8693 .n(4)
8694 .k(k)
8695 .ks(3)
8696 .a_offset(43)
8697 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
8698 }
8699 }
8700
8701 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, zero) {
8702 TEST_REQUIRES_X86_XOP;
8703 for (uint32_t mz = 0; mz < 1; mz++) {
8704 for (size_t k = 1; k <= 40; k += 9) {
8705 GemmMicrokernelTester()
8706 .mr(1)
8707 .nr(4)
8708 .kr(2)
8709 .sr(1)
8710 .m(1)
8711 .n(4)
8712 .k(k)
8713 .ks(3)
8714 .a_offset(43)
8715 .zero_index(mz)
8716 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
8717 }
8718 }
8719 }
8720
8721 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, qmin) {
8722 TEST_REQUIRES_X86_XOP;
8723 GemmMicrokernelTester()
8724 .mr(1)
8725 .nr(4)
8726 .kr(2)
8727 .sr(1)
8728 .m(1)
8729 .n(4)
8730 .k(8)
8731 .qmin(128)
8732 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
8733 }
8734
8735 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, qmax) {
8736 TEST_REQUIRES_X86_XOP;
8737 GemmMicrokernelTester()
8738 .mr(1)
8739 .nr(4)
8740 .kr(2)
8741 .sr(1)
8742 .m(1)
8743 .n(4)
8744 .k(8)
8745 .qmax(128)
8746 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
8747 }
8748
8749 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, strided_cm) {
8750 TEST_REQUIRES_X86_XOP;
8751 GemmMicrokernelTester()
8752 .mr(1)
8753 .nr(4)
8754 .kr(2)
8755 .sr(1)
8756 .m(1)
8757 .n(4)
8758 .k(8)
8759 .cm_stride(7)
8760 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
8761 }
8762
8763 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, no_a_zero_point) {
8764 TEST_REQUIRES_X86_XOP;
8765 for (size_t k = 1; k <= 40; k += 9) {
8766 GemmMicrokernelTester()
8767 .mr(1)
8768 .nr(4)
8769 .kr(2)
8770 .sr(1)
8771 .m(1)
8772 .n(4)
8773 .k(k)
8774 .a_zero_point(0)
8775 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
8776 }
8777 }
8778
8779 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, no_b_zero_point) {
8780 TEST_REQUIRES_X86_XOP;
8781 for (size_t k = 1; k <= 40; k += 9) {
8782 GemmMicrokernelTester()
8783 .mr(1)
8784 .nr(4)
8785 .kr(2)
8786 .sr(1)
8787 .m(1)
8788 .n(4)
8789 .k(k)
8790 .b_zero_point(0)
8791 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
8792 }
8793 }
8794
8795 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, no_zero_point) {
8796 TEST_REQUIRES_X86_XOP;
8797 for (size_t k = 1; k <= 40; k += 9) {
8798 GemmMicrokernelTester()
8799 .mr(1)
8800 .nr(4)
8801 .kr(2)
8802 .sr(1)
8803 .m(1)
8804 .n(4)
8805 .k(k)
8806 .a_zero_point(0)
8807 .b_zero_point(0)
8808 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
8809 }
8810 }
8811#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8812
8813
8814#if XNN_ARCH_X86 || XNN_ARCH_X86_64
8815 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_eq_8) {
8816 TEST_REQUIRES_X86_XOP;
8817 GemmMicrokernelTester()
8818 .mr(2)
8819 .nr(4)
8820 .kr(2)
8821 .sr(1)
8822 .m(2)
8823 .n(4)
8824 .k(8)
8825 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
8826 }
8827
8828 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, strided_cn) {
8829 TEST_REQUIRES_X86_XOP;
8830 GemmMicrokernelTester()
8831 .mr(2)
8832 .nr(4)
8833 .kr(2)
8834 .sr(1)
8835 .m(2)
8836 .n(4)
8837 .k(8)
8838 .cn_stride(7)
8839 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
8840 }
8841
8842 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_eq_8_subtile) {
8843 TEST_REQUIRES_X86_XOP;
8844 for (uint32_t m = 1; m <= 2; m++) {
8845 for (uint32_t n = 1; n <= 4; n++) {
8846 GemmMicrokernelTester()
8847 .mr(2)
8848 .nr(4)
8849 .kr(2)
8850 .sr(1)
8851 .m(m)
8852 .n(n)
8853 .k(8)
8854 .iterations(1)
8855 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
8856 }
8857 }
8858 }
8859
8860 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_eq_8_subtile_m) {
8861 TEST_REQUIRES_X86_XOP;
8862 for (uint32_t m = 1; m <= 2; m++) {
8863 GemmMicrokernelTester()
8864 .mr(2)
8865 .nr(4)
8866 .kr(2)
8867 .sr(1)
8868 .m(m)
8869 .n(4)
8870 .k(8)
8871 .iterations(1)
8872 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
8873 }
8874 }
8875
8876 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_eq_8_subtile_n) {
8877 TEST_REQUIRES_X86_XOP;
8878 for (uint32_t n = 1; n <= 4; n++) {
8879 GemmMicrokernelTester()
8880 .mr(2)
8881 .nr(4)
8882 .kr(2)
8883 .sr(1)
8884 .m(2)
8885 .n(n)
8886 .k(8)
8887 .iterations(1)
8888 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
8889 }
8890 }
8891
8892 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_lt_8) {
8893 TEST_REQUIRES_X86_XOP;
8894 for (size_t k = 1; k < 8; k++) {
8895 GemmMicrokernelTester()
8896 .mr(2)
8897 .nr(4)
8898 .kr(2)
8899 .sr(1)
8900 .m(2)
8901 .n(4)
8902 .k(k)
8903 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
8904 }
8905 }
8906
8907 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_lt_8_subtile) {
8908 TEST_REQUIRES_X86_XOP;
8909 for (size_t k = 1; k < 8; k++) {
8910 for (uint32_t m = 1; m <= 2; m++) {
8911 for (uint32_t n = 1; n <= 4; n++) {
8912 GemmMicrokernelTester()
8913 .mr(2)
8914 .nr(4)
8915 .kr(2)
8916 .sr(1)
8917 .m(m)
8918 .n(n)
8919 .k(k)
8920 .iterations(1)
8921 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
8922 }
8923 }
8924 }
8925 }
8926
8927 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_gt_8) {
8928 TEST_REQUIRES_X86_XOP;
8929 for (size_t k = 9; k < 16; k++) {
8930 GemmMicrokernelTester()
8931 .mr(2)
8932 .nr(4)
8933 .kr(2)
8934 .sr(1)
8935 .m(2)
8936 .n(4)
8937 .k(k)
8938 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
8939 }
8940 }
8941
8942 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_gt_8_subtile) {
8943 TEST_REQUIRES_X86_XOP;
8944 for (size_t k = 9; k < 16; k++) {
8945 for (uint32_t m = 1; m <= 2; m++) {
8946 for (uint32_t n = 1; n <= 4; n++) {
8947 GemmMicrokernelTester()
8948 .mr(2)
8949 .nr(4)
8950 .kr(2)
8951 .sr(1)
8952 .m(m)
8953 .n(n)
8954 .k(k)
8955 .iterations(1)
8956 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
8957 }
8958 }
8959 }
8960 }
8961
8962 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_div_8) {
8963 TEST_REQUIRES_X86_XOP;
8964 for (size_t k = 16; k <= 80; k += 8) {
8965 GemmMicrokernelTester()
8966 .mr(2)
8967 .nr(4)
8968 .kr(2)
8969 .sr(1)
8970 .m(2)
8971 .n(4)
8972 .k(k)
8973 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
8974 }
8975 }
8976
8977 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_div_8_subtile) {
8978 TEST_REQUIRES_X86_XOP;
8979 for (size_t k = 16; k <= 80; k += 8) {
8980 for (uint32_t m = 1; m <= 2; m++) {
8981 for (uint32_t n = 1; n <= 4; n++) {
8982 GemmMicrokernelTester()
8983 .mr(2)
8984 .nr(4)
8985 .kr(2)
8986 .sr(1)
8987 .m(m)
8988 .n(n)
8989 .k(k)
8990 .iterations(1)
8991 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
8992 }
8993 }
8994 }
8995 }
8996
8997 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_gt_4) {
8998 TEST_REQUIRES_X86_XOP;
8999 for (uint32_t n = 5; n < 8; n++) {
9000 for (size_t k = 1; k <= 40; k += 9) {
9001 GemmMicrokernelTester()
9002 .mr(2)
9003 .nr(4)
9004 .kr(2)
9005 .sr(1)
9006 .m(2)
9007 .n(4)
9008 .k(k)
9009 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
9010 }
9011 }
9012 }
9013
9014 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_gt_4_strided_cn) {
9015 TEST_REQUIRES_X86_XOP;
9016 for (uint32_t n = 5; n < 8; n++) {
9017 for (size_t k = 1; k <= 40; k += 9) {
9018 GemmMicrokernelTester()
9019 .mr(2)
9020 .nr(4)
9021 .kr(2)
9022 .sr(1)
9023 .m(2)
9024 .n(4)
9025 .k(k)
9026 .cn_stride(7)
9027 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
9028 }
9029 }
9030 }
9031
9032 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_gt_4_subtile) {
9033 TEST_REQUIRES_X86_XOP;
9034 for (uint32_t n = 5; n < 8; n++) {
9035 for (size_t k = 1; k <= 40; k += 9) {
9036 for (uint32_t m = 1; m <= 2; m++) {
9037 GemmMicrokernelTester()
9038 .mr(2)
9039 .nr(4)
9040 .kr(2)
9041 .sr(1)
9042 .m(m)
9043 .n(n)
9044 .k(k)
9045 .iterations(1)
9046 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
9047 }
9048 }
9049 }
9050 }
9051
9052 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_div_4) {
9053 TEST_REQUIRES_X86_XOP;
9054 for (uint32_t n = 8; n <= 12; n += 4) {
9055 for (size_t k = 1; k <= 40; k += 9) {
9056 GemmMicrokernelTester()
9057 .mr(2)
9058 .nr(4)
9059 .kr(2)
9060 .sr(1)
9061 .m(2)
9062 .n(4)
9063 .k(k)
9064 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
9065 }
9066 }
9067 }
9068
9069 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_div_4_strided_cn) {
9070 TEST_REQUIRES_X86_XOP;
9071 for (uint32_t n = 8; n <= 12; n += 4) {
9072 for (size_t k = 1; k <= 40; k += 9) {
9073 GemmMicrokernelTester()
9074 .mr(2)
9075 .nr(4)
9076 .kr(2)
9077 .sr(1)
9078 .m(2)
9079 .n(n)
9080 .k(k)
9081 .cn_stride(7)
9082 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
9083 }
9084 }
9085 }
9086
9087 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_div_4_subtile) {
9088 TEST_REQUIRES_X86_XOP;
9089 for (uint32_t n = 8; n <= 12; n += 4) {
9090 for (size_t k = 1; k <= 40; k += 9) {
9091 for (uint32_t m = 1; m <= 2; m++) {
9092 GemmMicrokernelTester()
9093 .mr(2)
9094 .nr(4)
9095 .kr(2)
9096 .sr(1)
9097 .m(m)
9098 .n(n)
9099 .k(k)
9100 .iterations(1)
9101 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
9102 }
9103 }
9104 }
9105 }
9106
9107 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, small_kernel) {
9108 TEST_REQUIRES_X86_XOP;
9109 for (size_t k = 1; k <= 40; k += 9) {
9110 GemmMicrokernelTester()
9111 .mr(2)
9112 .nr(4)
9113 .kr(2)
9114 .sr(1)
9115 .m(2)
9116 .n(4)
9117 .k(k)
9118 .ks(3)
9119 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
9120 }
9121 }
9122
9123 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, small_kernel_subtile) {
9124 TEST_REQUIRES_X86_XOP;
9125 for (size_t k = 1; k <= 40; k += 9) {
9126 for (uint32_t m = 1; m <= 2; m++) {
9127 for (uint32_t n = 1; n <= 4; n++) {
9128 GemmMicrokernelTester()
9129 .mr(2)
9130 .nr(4)
9131 .kr(2)
9132 .sr(1)
9133 .m(m)
9134 .n(n)
9135 .k(k)
9136 .ks(3)
9137 .iterations(1)
9138 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
9139 }
9140 }
9141 }
9142 }
9143
9144 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_gt_4_small_kernel) {
9145 TEST_REQUIRES_X86_XOP;
9146 for (uint32_t n = 5; n < 8; n++) {
9147 for (size_t k = 1; k <= 40; k += 9) {
9148 GemmMicrokernelTester()
9149 .mr(2)
9150 .nr(4)
9151 .kr(2)
9152 .sr(1)
9153 .m(2)
9154 .n(4)
9155 .k(k)
9156 .ks(3)
9157 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
9158 }
9159 }
9160 }
9161
9162 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_div_4_small_kernel) {
9163 TEST_REQUIRES_X86_XOP;
9164 for (uint32_t n = 8; n <= 12; n += 4) {
9165 for (size_t k = 1; k <= 40; k += 9) {
9166 GemmMicrokernelTester()
9167 .mr(2)
9168 .nr(4)
9169 .kr(2)
9170 .sr(1)
9171 .m(2)
9172 .n(4)
9173 .k(k)
9174 .ks(3)
9175 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
9176 }
9177 }
9178 }
9179
9180 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, strided_cm_subtile) {
9181 TEST_REQUIRES_X86_XOP;
9182 for (size_t k = 1; k <= 40; k += 9) {
9183 for (uint32_t m = 1; m <= 2; m++) {
9184 for (uint32_t n = 1; n <= 4; n++) {
9185 GemmMicrokernelTester()
9186 .mr(2)
9187 .nr(4)
9188 .kr(2)
9189 .sr(1)
9190 .m(m)
9191 .n(n)
9192 .k(k)
9193 .cm_stride(7)
9194 .iterations(1)
9195 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
9196 }
9197 }
9198 }
9199 }
9200
9201 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, a_offset) {
9202 TEST_REQUIRES_X86_XOP;
9203 for (size_t k = 1; k <= 40; k += 9) {
9204 GemmMicrokernelTester()
9205 .mr(2)
9206 .nr(4)
9207 .kr(2)
9208 .sr(1)
9209 .m(2)
9210 .n(4)
9211 .k(k)
9212 .ks(3)
9213 .a_offset(83)
9214 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
9215 }
9216 }
9217
9218 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, zero) {
9219 TEST_REQUIRES_X86_XOP;
9220 for (uint32_t mz = 0; mz < 2; mz++) {
9221 for (size_t k = 1; k <= 40; k += 9) {
9222 GemmMicrokernelTester()
9223 .mr(2)
9224 .nr(4)
9225 .kr(2)
9226 .sr(1)
9227 .m(2)
9228 .n(4)
9229 .k(k)
9230 .ks(3)
9231 .a_offset(83)
9232 .zero_index(mz)
9233 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
9234 }
9235 }
9236 }
9237
9238 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, qmin) {
9239 TEST_REQUIRES_X86_XOP;
9240 GemmMicrokernelTester()
9241 .mr(2)
9242 .nr(4)
9243 .kr(2)
9244 .sr(1)
9245 .m(2)
9246 .n(4)
9247 .k(8)
9248 .qmin(128)
9249 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
9250 }
9251
9252 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, qmax) {
9253 TEST_REQUIRES_X86_XOP;
9254 GemmMicrokernelTester()
9255 .mr(2)
9256 .nr(4)
9257 .kr(2)
9258 .sr(1)
9259 .m(2)
9260 .n(4)
9261 .k(8)
9262 .qmax(128)
9263 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
9264 }
9265
9266 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, strided_cm) {
9267 TEST_REQUIRES_X86_XOP;
9268 GemmMicrokernelTester()
9269 .mr(2)
9270 .nr(4)
9271 .kr(2)
9272 .sr(1)
9273 .m(2)
9274 .n(4)
9275 .k(8)
9276 .cm_stride(7)
9277 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
9278 }
9279
9280 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, no_a_zero_point) {
9281 TEST_REQUIRES_X86_XOP;
9282 for (size_t k = 1; k <= 40; k += 9) {
9283 GemmMicrokernelTester()
9284 .mr(2)
9285 .nr(4)
9286 .kr(2)
9287 .sr(1)
9288 .m(2)
9289 .n(4)
9290 .k(k)
9291 .a_zero_point(0)
9292 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
9293 }
9294 }
9295
9296 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, no_b_zero_point) {
9297 TEST_REQUIRES_X86_XOP;
9298 for (size_t k = 1; k <= 40; k += 9) {
9299 GemmMicrokernelTester()
9300 .mr(2)
9301 .nr(4)
9302 .kr(2)
9303 .sr(1)
9304 .m(2)
9305 .n(4)
9306 .k(k)
9307 .b_zero_point(0)
9308 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
9309 }
9310 }
9311
9312 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, no_zero_point) {
9313 TEST_REQUIRES_X86_XOP;
9314 for (size_t k = 1; k <= 40; k += 9) {
9315 GemmMicrokernelTester()
9316 .mr(2)
9317 .nr(4)
9318 .kr(2)
9319 .sr(1)
9320 .m(2)
9321 .n(4)
9322 .k(k)
9323 .a_zero_point(0)
9324 .b_zero_point(0)
9325 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
9326 }
9327 }
9328#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9329
9330
9331#if XNN_ARCH_X86 || XNN_ARCH_X86_64
9332 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_eq_8) {
9333 TEST_REQUIRES_X86_XOP;
9334 GemmMicrokernelTester()
9335 .mr(3)
9336 .nr(4)
9337 .kr(2)
9338 .sr(1)
9339 .m(3)
9340 .n(4)
9341 .k(8)
9342 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
9343 }
9344
9345 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, strided_cn) {
9346 TEST_REQUIRES_X86_XOP;
9347 GemmMicrokernelTester()
9348 .mr(3)
9349 .nr(4)
9350 .kr(2)
9351 .sr(1)
9352 .m(3)
9353 .n(4)
9354 .k(8)
9355 .cn_stride(7)
9356 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
9357 }
9358
9359 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_eq_8_subtile) {
9360 TEST_REQUIRES_X86_XOP;
9361 for (uint32_t m = 1; m <= 3; m++) {
9362 for (uint32_t n = 1; n <= 4; n++) {
9363 GemmMicrokernelTester()
9364 .mr(3)
9365 .nr(4)
9366 .kr(2)
9367 .sr(1)
9368 .m(m)
9369 .n(n)
9370 .k(8)
9371 .iterations(1)
9372 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
9373 }
9374 }
9375 }
9376
9377 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_eq_8_subtile_m) {
9378 TEST_REQUIRES_X86_XOP;
9379 for (uint32_t m = 1; m <= 3; m++) {
9380 GemmMicrokernelTester()
9381 .mr(3)
9382 .nr(4)
9383 .kr(2)
9384 .sr(1)
9385 .m(m)
9386 .n(4)
9387 .k(8)
9388 .iterations(1)
9389 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
9390 }
9391 }
9392
9393 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_eq_8_subtile_n) {
9394 TEST_REQUIRES_X86_XOP;
9395 for (uint32_t n = 1; n <= 4; n++) {
9396 GemmMicrokernelTester()
9397 .mr(3)
9398 .nr(4)
9399 .kr(2)
9400 .sr(1)
9401 .m(3)
9402 .n(n)
9403 .k(8)
9404 .iterations(1)
9405 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
9406 }
9407 }
9408
9409 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_lt_8) {
9410 TEST_REQUIRES_X86_XOP;
9411 for (size_t k = 1; k < 8; k++) {
9412 GemmMicrokernelTester()
9413 .mr(3)
9414 .nr(4)
9415 .kr(2)
9416 .sr(1)
9417 .m(3)
9418 .n(4)
9419 .k(k)
9420 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
9421 }
9422 }
9423
9424 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_lt_8_subtile) {
9425 TEST_REQUIRES_X86_XOP;
9426 for (size_t k = 1; k < 8; k++) {
9427 for (uint32_t m = 1; m <= 3; m++) {
9428 for (uint32_t n = 1; n <= 4; n++) {
9429 GemmMicrokernelTester()
9430 .mr(3)
9431 .nr(4)
9432 .kr(2)
9433 .sr(1)
9434 .m(m)
9435 .n(n)
9436 .k(k)
9437 .iterations(1)
9438 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
9439 }
9440 }
9441 }
9442 }
9443
9444 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_gt_8) {
9445 TEST_REQUIRES_X86_XOP;
9446 for (size_t k = 9; k < 16; k++) {
9447 GemmMicrokernelTester()
9448 .mr(3)
9449 .nr(4)
9450 .kr(2)
9451 .sr(1)
9452 .m(3)
9453 .n(4)
9454 .k(k)
9455 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
9456 }
9457 }
9458
9459 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_gt_8_subtile) {
9460 TEST_REQUIRES_X86_XOP;
9461 for (size_t k = 9; k < 16; k++) {
9462 for (uint32_t m = 1; m <= 3; m++) {
9463 for (uint32_t n = 1; n <= 4; n++) {
9464 GemmMicrokernelTester()
9465 .mr(3)
9466 .nr(4)
9467 .kr(2)
9468 .sr(1)
9469 .m(m)
9470 .n(n)
9471 .k(k)
9472 .iterations(1)
9473 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
9474 }
9475 }
9476 }
9477 }
9478
9479 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_div_8) {
9480 TEST_REQUIRES_X86_XOP;
9481 for (size_t k = 16; k <= 80; k += 8) {
9482 GemmMicrokernelTester()
9483 .mr(3)
9484 .nr(4)
9485 .kr(2)
9486 .sr(1)
9487 .m(3)
9488 .n(4)
9489 .k(k)
9490 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
9491 }
9492 }
9493
9494 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_div_8_subtile) {
9495 TEST_REQUIRES_X86_XOP;
9496 for (size_t k = 16; k <= 80; k += 8) {
9497 for (uint32_t m = 1; m <= 3; m++) {
9498 for (uint32_t n = 1; n <= 4; n++) {
9499 GemmMicrokernelTester()
9500 .mr(3)
9501 .nr(4)
9502 .kr(2)
9503 .sr(1)
9504 .m(m)
9505 .n(n)
9506 .k(k)
9507 .iterations(1)
9508 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
9509 }
9510 }
9511 }
9512 }
9513
9514 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_gt_4) {
9515 TEST_REQUIRES_X86_XOP;
9516 for (uint32_t n = 5; n < 8; n++) {
9517 for (size_t k = 1; k <= 40; k += 9) {
9518 GemmMicrokernelTester()
9519 .mr(3)
9520 .nr(4)
9521 .kr(2)
9522 .sr(1)
9523 .m(3)
9524 .n(4)
9525 .k(k)
9526 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
9527 }
9528 }
9529 }
9530
9531 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_gt_4_strided_cn) {
9532 TEST_REQUIRES_X86_XOP;
9533 for (uint32_t n = 5; n < 8; n++) {
9534 for (size_t k = 1; k <= 40; k += 9) {
9535 GemmMicrokernelTester()
9536 .mr(3)
9537 .nr(4)
9538 .kr(2)
9539 .sr(1)
9540 .m(3)
9541 .n(4)
9542 .k(k)
9543 .cn_stride(7)
9544 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
9545 }
9546 }
9547 }
9548
9549 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_gt_4_subtile) {
9550 TEST_REQUIRES_X86_XOP;
9551 for (uint32_t n = 5; n < 8; n++) {
9552 for (size_t k = 1; k <= 40; k += 9) {
9553 for (uint32_t m = 1; m <= 3; m++) {
9554 GemmMicrokernelTester()
9555 .mr(3)
9556 .nr(4)
9557 .kr(2)
9558 .sr(1)
9559 .m(m)
9560 .n(n)
9561 .k(k)
9562 .iterations(1)
9563 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
9564 }
9565 }
9566 }
9567 }
9568
9569 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_div_4) {
9570 TEST_REQUIRES_X86_XOP;
9571 for (uint32_t n = 8; n <= 12; n += 4) {
9572 for (size_t k = 1; k <= 40; k += 9) {
9573 GemmMicrokernelTester()
9574 .mr(3)
9575 .nr(4)
9576 .kr(2)
9577 .sr(1)
9578 .m(3)
9579 .n(4)
9580 .k(k)
9581 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
9582 }
9583 }
9584 }
9585
9586 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_div_4_strided_cn) {
9587 TEST_REQUIRES_X86_XOP;
9588 for (uint32_t n = 8; n <= 12; n += 4) {
9589 for (size_t k = 1; k <= 40; k += 9) {
9590 GemmMicrokernelTester()
9591 .mr(3)
9592 .nr(4)
9593 .kr(2)
9594 .sr(1)
9595 .m(3)
9596 .n(n)
9597 .k(k)
9598 .cn_stride(7)
9599 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
9600 }
9601 }
9602 }
9603
9604 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_div_4_subtile) {
9605 TEST_REQUIRES_X86_XOP;
9606 for (uint32_t n = 8; n <= 12; n += 4) {
9607 for (size_t k = 1; k <= 40; k += 9) {
9608 for (uint32_t m = 1; m <= 3; m++) {
9609 GemmMicrokernelTester()
9610 .mr(3)
9611 .nr(4)
9612 .kr(2)
9613 .sr(1)
9614 .m(m)
9615 .n(n)
9616 .k(k)
9617 .iterations(1)
9618 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
9619 }
9620 }
9621 }
9622 }
9623
9624 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, small_kernel) {
9625 TEST_REQUIRES_X86_XOP;
9626 for (size_t k = 1; k <= 40; k += 9) {
9627 GemmMicrokernelTester()
9628 .mr(3)
9629 .nr(4)
9630 .kr(2)
9631 .sr(1)
9632 .m(3)
9633 .n(4)
9634 .k(k)
9635 .ks(3)
9636 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
9637 }
9638 }
9639
9640 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, small_kernel_subtile) {
9641 TEST_REQUIRES_X86_XOP;
9642 for (size_t k = 1; k <= 40; k += 9) {
9643 for (uint32_t m = 1; m <= 3; m++) {
9644 for (uint32_t n = 1; n <= 4; n++) {
9645 GemmMicrokernelTester()
9646 .mr(3)
9647 .nr(4)
9648 .kr(2)
9649 .sr(1)
9650 .m(m)
9651 .n(n)
9652 .k(k)
9653 .ks(3)
9654 .iterations(1)
9655 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
9656 }
9657 }
9658 }
9659 }
9660
9661 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_gt_4_small_kernel) {
9662 TEST_REQUIRES_X86_XOP;
9663 for (uint32_t n = 5; n < 8; n++) {
9664 for (size_t k = 1; k <= 40; k += 9) {
9665 GemmMicrokernelTester()
9666 .mr(3)
9667 .nr(4)
9668 .kr(2)
9669 .sr(1)
9670 .m(3)
9671 .n(4)
9672 .k(k)
9673 .ks(3)
9674 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
9675 }
9676 }
9677 }
9678
9679 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_div_4_small_kernel) {
9680 TEST_REQUIRES_X86_XOP;
9681 for (uint32_t n = 8; n <= 12; n += 4) {
9682 for (size_t k = 1; k <= 40; k += 9) {
9683 GemmMicrokernelTester()
9684 .mr(3)
9685 .nr(4)
9686 .kr(2)
9687 .sr(1)
9688 .m(3)
9689 .n(4)
9690 .k(k)
9691 .ks(3)
9692 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
9693 }
9694 }
9695 }
9696
9697 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, strided_cm_subtile) {
9698 TEST_REQUIRES_X86_XOP;
9699 for (size_t k = 1; k <= 40; k += 9) {
9700 for (uint32_t m = 1; m <= 3; m++) {
9701 for (uint32_t n = 1; n <= 4; n++) {
9702 GemmMicrokernelTester()
9703 .mr(3)
9704 .nr(4)
9705 .kr(2)
9706 .sr(1)
9707 .m(m)
9708 .n(n)
9709 .k(k)
9710 .cm_stride(7)
9711 .iterations(1)
9712 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
9713 }
9714 }
9715 }
9716 }
9717
9718 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, a_offset) {
9719 TEST_REQUIRES_X86_XOP;
9720 for (size_t k = 1; k <= 40; k += 9) {
9721 GemmMicrokernelTester()
9722 .mr(3)
9723 .nr(4)
9724 .kr(2)
9725 .sr(1)
9726 .m(3)
9727 .n(4)
9728 .k(k)
9729 .ks(3)
9730 .a_offset(127)
9731 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
9732 }
9733 }
9734
9735 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, zero) {
9736 TEST_REQUIRES_X86_XOP;
9737 for (uint32_t mz = 0; mz < 3; mz++) {
9738 for (size_t k = 1; k <= 40; k += 9) {
9739 GemmMicrokernelTester()
9740 .mr(3)
9741 .nr(4)
9742 .kr(2)
9743 .sr(1)
9744 .m(3)
9745 .n(4)
9746 .k(k)
9747 .ks(3)
9748 .a_offset(127)
9749 .zero_index(mz)
9750 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
9751 }
9752 }
9753 }
9754
9755 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, qmin) {
9756 TEST_REQUIRES_X86_XOP;
9757 GemmMicrokernelTester()
9758 .mr(3)
9759 .nr(4)
9760 .kr(2)
9761 .sr(1)
9762 .m(3)
9763 .n(4)
9764 .k(8)
9765 .qmin(128)
9766 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
9767 }
9768
9769 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, qmax) {
9770 TEST_REQUIRES_X86_XOP;
9771 GemmMicrokernelTester()
9772 .mr(3)
9773 .nr(4)
9774 .kr(2)
9775 .sr(1)
9776 .m(3)
9777 .n(4)
9778 .k(8)
9779 .qmax(128)
9780 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
9781 }
9782
9783 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, strided_cm) {
9784 TEST_REQUIRES_X86_XOP;
9785 GemmMicrokernelTester()
9786 .mr(3)
9787 .nr(4)
9788 .kr(2)
9789 .sr(1)
9790 .m(3)
9791 .n(4)
9792 .k(8)
9793 .cm_stride(7)
9794 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
9795 }
9796
9797 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, no_a_zero_point) {
9798 TEST_REQUIRES_X86_XOP;
9799 for (size_t k = 1; k <= 40; k += 9) {
9800 GemmMicrokernelTester()
9801 .mr(3)
9802 .nr(4)
9803 .kr(2)
9804 .sr(1)
9805 .m(3)
9806 .n(4)
9807 .k(k)
9808 .a_zero_point(0)
9809 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
9810 }
9811 }
9812
9813 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, no_b_zero_point) {
9814 TEST_REQUIRES_X86_XOP;
9815 for (size_t k = 1; k <= 40; k += 9) {
9816 GemmMicrokernelTester()
9817 .mr(3)
9818 .nr(4)
9819 .kr(2)
9820 .sr(1)
9821 .m(3)
9822 .n(4)
9823 .k(k)
9824 .b_zero_point(0)
9825 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
9826 }
9827 }
9828
9829 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, no_zero_point) {
9830 TEST_REQUIRES_X86_XOP;
9831 for (size_t k = 1; k <= 40; k += 9) {
9832 GemmMicrokernelTester()
9833 .mr(3)
9834 .nr(4)
9835 .kr(2)
9836 .sr(1)
9837 .m(3)
9838 .n(4)
9839 .k(k)
9840 .a_zero_point(0)
9841 .b_zero_point(0)
9842 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
9843 }
9844 }
9845#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9846
9847
9848#if XNN_ARCH_X86 || XNN_ARCH_X86_64
9849 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_eq_8) {
9850 TEST_REQUIRES_X86_XOP;
9851 GemmMicrokernelTester()
9852 .mr(4)
9853 .nr(4)
9854 .kr(2)
9855 .sr(1)
9856 .m(4)
9857 .n(4)
9858 .k(8)
9859 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
9860 }
9861
9862 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, strided_cn) {
9863 TEST_REQUIRES_X86_XOP;
9864 GemmMicrokernelTester()
9865 .mr(4)
9866 .nr(4)
9867 .kr(2)
9868 .sr(1)
9869 .m(4)
9870 .n(4)
9871 .k(8)
9872 .cn_stride(7)
9873 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
9874 }
9875
9876 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_eq_8_subtile) {
9877 TEST_REQUIRES_X86_XOP;
9878 for (uint32_t m = 1; m <= 4; m++) {
9879 for (uint32_t n = 1; n <= 4; n++) {
9880 GemmMicrokernelTester()
9881 .mr(4)
9882 .nr(4)
9883 .kr(2)
9884 .sr(1)
9885 .m(m)
9886 .n(n)
9887 .k(8)
9888 .iterations(1)
9889 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
9890 }
9891 }
9892 }
9893
9894 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_eq_8_subtile_m) {
9895 TEST_REQUIRES_X86_XOP;
9896 for (uint32_t m = 1; m <= 4; m++) {
9897 GemmMicrokernelTester()
9898 .mr(4)
9899 .nr(4)
9900 .kr(2)
9901 .sr(1)
9902 .m(m)
9903 .n(4)
9904 .k(8)
9905 .iterations(1)
9906 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
9907 }
9908 }
9909
9910 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_eq_8_subtile_n) {
9911 TEST_REQUIRES_X86_XOP;
9912 for (uint32_t n = 1; n <= 4; n++) {
9913 GemmMicrokernelTester()
9914 .mr(4)
9915 .nr(4)
9916 .kr(2)
9917 .sr(1)
9918 .m(4)
9919 .n(n)
9920 .k(8)
9921 .iterations(1)
9922 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
9923 }
9924 }
9925
9926 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_lt_8) {
9927 TEST_REQUIRES_X86_XOP;
9928 for (size_t k = 1; k < 8; k++) {
9929 GemmMicrokernelTester()
9930 .mr(4)
9931 .nr(4)
9932 .kr(2)
9933 .sr(1)
9934 .m(4)
9935 .n(4)
9936 .k(k)
9937 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
9938 }
9939 }
9940
9941 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_lt_8_subtile) {
9942 TEST_REQUIRES_X86_XOP;
9943 for (size_t k = 1; k < 8; k++) {
9944 for (uint32_t m = 1; m <= 4; m++) {
9945 for (uint32_t n = 1; n <= 4; n++) {
9946 GemmMicrokernelTester()
9947 .mr(4)
9948 .nr(4)
9949 .kr(2)
9950 .sr(1)
9951 .m(m)
9952 .n(n)
9953 .k(k)
9954 .iterations(1)
9955 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
9956 }
9957 }
9958 }
9959 }
9960
9961 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_gt_8) {
9962 TEST_REQUIRES_X86_XOP;
9963 for (size_t k = 9; k < 16; k++) {
9964 GemmMicrokernelTester()
9965 .mr(4)
9966 .nr(4)
9967 .kr(2)
9968 .sr(1)
9969 .m(4)
9970 .n(4)
9971 .k(k)
9972 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
9973 }
9974 }
9975
9976 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_gt_8_subtile) {
9977 TEST_REQUIRES_X86_XOP;
9978 for (size_t k = 9; k < 16; k++) {
9979 for (uint32_t m = 1; m <= 4; m++) {
9980 for (uint32_t n = 1; n <= 4; n++) {
9981 GemmMicrokernelTester()
9982 .mr(4)
9983 .nr(4)
9984 .kr(2)
9985 .sr(1)
9986 .m(m)
9987 .n(n)
9988 .k(k)
9989 .iterations(1)
9990 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
9991 }
9992 }
9993 }
9994 }
9995
9996 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_div_8) {
9997 TEST_REQUIRES_X86_XOP;
9998 for (size_t k = 16; k <= 80; k += 8) {
9999 GemmMicrokernelTester()
10000 .mr(4)
10001 .nr(4)
10002 .kr(2)
10003 .sr(1)
10004 .m(4)
10005 .n(4)
10006 .k(k)
10007 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
10008 }
10009 }
10010
10011 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_div_8_subtile) {
10012 TEST_REQUIRES_X86_XOP;
10013 for (size_t k = 16; k <= 80; k += 8) {
10014 for (uint32_t m = 1; m <= 4; m++) {
10015 for (uint32_t n = 1; n <= 4; n++) {
10016 GemmMicrokernelTester()
10017 .mr(4)
10018 .nr(4)
10019 .kr(2)
10020 .sr(1)
10021 .m(m)
10022 .n(n)
10023 .k(k)
10024 .iterations(1)
10025 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
10026 }
10027 }
10028 }
10029 }
10030
10031 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_gt_4) {
10032 TEST_REQUIRES_X86_XOP;
10033 for (uint32_t n = 5; n < 8; n++) {
10034 for (size_t k = 1; k <= 40; k += 9) {
10035 GemmMicrokernelTester()
10036 .mr(4)
10037 .nr(4)
10038 .kr(2)
10039 .sr(1)
10040 .m(4)
10041 .n(4)
10042 .k(k)
10043 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
10044 }
10045 }
10046 }
10047
10048 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_gt_4_strided_cn) {
10049 TEST_REQUIRES_X86_XOP;
10050 for (uint32_t n = 5; n < 8; n++) {
10051 for (size_t k = 1; k <= 40; k += 9) {
10052 GemmMicrokernelTester()
10053 .mr(4)
10054 .nr(4)
10055 .kr(2)
10056 .sr(1)
10057 .m(4)
10058 .n(4)
10059 .k(k)
10060 .cn_stride(7)
10061 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
10062 }
10063 }
10064 }
10065
10066 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_gt_4_subtile) {
10067 TEST_REQUIRES_X86_XOP;
10068 for (uint32_t n = 5; n < 8; n++) {
10069 for (size_t k = 1; k <= 40; k += 9) {
10070 for (uint32_t m = 1; m <= 4; m++) {
10071 GemmMicrokernelTester()
10072 .mr(4)
10073 .nr(4)
10074 .kr(2)
10075 .sr(1)
10076 .m(m)
10077 .n(n)
10078 .k(k)
10079 .iterations(1)
10080 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
10081 }
10082 }
10083 }
10084 }
10085
10086 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_div_4) {
10087 TEST_REQUIRES_X86_XOP;
10088 for (uint32_t n = 8; n <= 12; n += 4) {
10089 for (size_t k = 1; k <= 40; k += 9) {
10090 GemmMicrokernelTester()
10091 .mr(4)
10092 .nr(4)
10093 .kr(2)
10094 .sr(1)
10095 .m(4)
10096 .n(4)
10097 .k(k)
10098 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
10099 }
10100 }
10101 }
10102
10103 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_div_4_strided_cn) {
10104 TEST_REQUIRES_X86_XOP;
10105 for (uint32_t n = 8; n <= 12; n += 4) {
10106 for (size_t k = 1; k <= 40; k += 9) {
10107 GemmMicrokernelTester()
10108 .mr(4)
10109 .nr(4)
10110 .kr(2)
10111 .sr(1)
10112 .m(4)
10113 .n(n)
10114 .k(k)
10115 .cn_stride(7)
10116 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
10117 }
10118 }
10119 }
10120
10121 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_div_4_subtile) {
10122 TEST_REQUIRES_X86_XOP;
10123 for (uint32_t n = 8; n <= 12; n += 4) {
10124 for (size_t k = 1; k <= 40; k += 9) {
10125 for (uint32_t m = 1; m <= 4; m++) {
10126 GemmMicrokernelTester()
10127 .mr(4)
10128 .nr(4)
10129 .kr(2)
10130 .sr(1)
10131 .m(m)
10132 .n(n)
10133 .k(k)
10134 .iterations(1)
10135 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
10136 }
10137 }
10138 }
10139 }
10140
10141 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, small_kernel) {
10142 TEST_REQUIRES_X86_XOP;
10143 for (size_t k = 1; k <= 40; k += 9) {
10144 GemmMicrokernelTester()
10145 .mr(4)
10146 .nr(4)
10147 .kr(2)
10148 .sr(1)
10149 .m(4)
10150 .n(4)
10151 .k(k)
10152 .ks(3)
10153 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
10154 }
10155 }
10156
10157 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, small_kernel_subtile) {
10158 TEST_REQUIRES_X86_XOP;
10159 for (size_t k = 1; k <= 40; k += 9) {
10160 for (uint32_t m = 1; m <= 4; m++) {
10161 for (uint32_t n = 1; n <= 4; n++) {
10162 GemmMicrokernelTester()
10163 .mr(4)
10164 .nr(4)
10165 .kr(2)
10166 .sr(1)
10167 .m(m)
10168 .n(n)
10169 .k(k)
10170 .ks(3)
10171 .iterations(1)
10172 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
10173 }
10174 }
10175 }
10176 }
10177
10178 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_gt_4_small_kernel) {
10179 TEST_REQUIRES_X86_XOP;
10180 for (uint32_t n = 5; n < 8; n++) {
10181 for (size_t k = 1; k <= 40; k += 9) {
10182 GemmMicrokernelTester()
10183 .mr(4)
10184 .nr(4)
10185 .kr(2)
10186 .sr(1)
10187 .m(4)
10188 .n(4)
10189 .k(k)
10190 .ks(3)
10191 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
10192 }
10193 }
10194 }
10195
10196 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_div_4_small_kernel) {
10197 TEST_REQUIRES_X86_XOP;
10198 for (uint32_t n = 8; n <= 12; n += 4) {
10199 for (size_t k = 1; k <= 40; k += 9) {
10200 GemmMicrokernelTester()
10201 .mr(4)
10202 .nr(4)
10203 .kr(2)
10204 .sr(1)
10205 .m(4)
10206 .n(4)
10207 .k(k)
10208 .ks(3)
10209 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
10210 }
10211 }
10212 }
10213
10214 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, strided_cm_subtile) {
10215 TEST_REQUIRES_X86_XOP;
10216 for (size_t k = 1; k <= 40; k += 9) {
10217 for (uint32_t m = 1; m <= 4; m++) {
10218 for (uint32_t n = 1; n <= 4; n++) {
10219 GemmMicrokernelTester()
10220 .mr(4)
10221 .nr(4)
10222 .kr(2)
10223 .sr(1)
10224 .m(m)
10225 .n(n)
10226 .k(k)
10227 .cm_stride(7)
10228 .iterations(1)
10229 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
10230 }
10231 }
10232 }
10233 }
10234
10235 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, a_offset) {
10236 TEST_REQUIRES_X86_XOP;
10237 for (size_t k = 1; k <= 40; k += 9) {
10238 GemmMicrokernelTester()
10239 .mr(4)
10240 .nr(4)
10241 .kr(2)
10242 .sr(1)
10243 .m(4)
10244 .n(4)
10245 .k(k)
10246 .ks(3)
10247 .a_offset(163)
10248 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
10249 }
10250 }
10251
10252 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, zero) {
10253 TEST_REQUIRES_X86_XOP;
10254 for (uint32_t mz = 0; mz < 4; mz++) {
10255 for (size_t k = 1; k <= 40; k += 9) {
10256 GemmMicrokernelTester()
10257 .mr(4)
10258 .nr(4)
10259 .kr(2)
10260 .sr(1)
10261 .m(4)
10262 .n(4)
10263 .k(k)
10264 .ks(3)
10265 .a_offset(163)
10266 .zero_index(mz)
10267 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
10268 }
10269 }
10270 }
10271
10272 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, qmin) {
10273 TEST_REQUIRES_X86_XOP;
10274 GemmMicrokernelTester()
10275 .mr(4)
10276 .nr(4)
10277 .kr(2)
10278 .sr(1)
10279 .m(4)
10280 .n(4)
10281 .k(8)
10282 .qmin(128)
10283 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
10284 }
10285
10286 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, qmax) {
10287 TEST_REQUIRES_X86_XOP;
10288 GemmMicrokernelTester()
10289 .mr(4)
10290 .nr(4)
10291 .kr(2)
10292 .sr(1)
10293 .m(4)
10294 .n(4)
10295 .k(8)
10296 .qmax(128)
10297 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
10298 }
10299
10300 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, strided_cm) {
10301 TEST_REQUIRES_X86_XOP;
10302 GemmMicrokernelTester()
10303 .mr(4)
10304 .nr(4)
10305 .kr(2)
10306 .sr(1)
10307 .m(4)
10308 .n(4)
10309 .k(8)
10310 .cm_stride(7)
10311 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
10312 }
10313
10314 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, no_a_zero_point) {
10315 TEST_REQUIRES_X86_XOP;
10316 for (size_t k = 1; k <= 40; k += 9) {
10317 GemmMicrokernelTester()
10318 .mr(4)
10319 .nr(4)
10320 .kr(2)
10321 .sr(1)
10322 .m(4)
10323 .n(4)
10324 .k(k)
10325 .a_zero_point(0)
10326 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
10327 }
10328 }
10329
10330 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, no_b_zero_point) {
10331 TEST_REQUIRES_X86_XOP;
10332 for (size_t k = 1; k <= 40; k += 9) {
10333 GemmMicrokernelTester()
10334 .mr(4)
10335 .nr(4)
10336 .kr(2)
10337 .sr(1)
10338 .m(4)
10339 .n(4)
10340 .k(k)
10341 .b_zero_point(0)
10342 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
10343 }
10344 }
10345
10346 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, no_zero_point) {
10347 TEST_REQUIRES_X86_XOP;
10348 for (size_t k = 1; k <= 40; k += 9) {
10349 GemmMicrokernelTester()
10350 .mr(4)
10351 .nr(4)
10352 .kr(2)
10353 .sr(1)
10354 .m(4)
10355 .n(4)
10356 .k(k)
10357 .a_zero_point(0)
10358 .b_zero_point(0)
10359 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
10360 }
10361 }
10362#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
10363
10364
10365#if XNN_ARCH_X86 || XNN_ARCH_X86_64
10366 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_eq_8) {
10367 TEST_REQUIRES_X86_SSE2;
10368 GemmMicrokernelTester()
10369 .mr(1)
10370 .nr(4)
10371 .kr(2)
10372 .sr(1)
10373 .m(1)
10374 .n(4)
10375 .k(8)
10376 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
10377 }
10378
10379 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, strided_cn) {
10380 TEST_REQUIRES_X86_SSE2;
10381 GemmMicrokernelTester()
10382 .mr(1)
10383 .nr(4)
10384 .kr(2)
10385 .sr(1)
10386 .m(1)
10387 .n(4)
10388 .k(8)
10389 .cn_stride(7)
10390 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
10391 }
10392
10393 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_eq_8_subtile) {
10394 TEST_REQUIRES_X86_SSE2;
10395 for (uint32_t m = 1; m <= 1; m++) {
10396 for (uint32_t n = 1; n <= 4; n++) {
10397 GemmMicrokernelTester()
10398 .mr(1)
10399 .nr(4)
10400 .kr(2)
10401 .sr(1)
10402 .m(m)
10403 .n(n)
10404 .k(8)
10405 .iterations(1)
10406 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
10407 }
10408 }
10409 }
10410
10411 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_eq_8_subtile_m) {
10412 TEST_REQUIRES_X86_SSE2;
10413 for (uint32_t m = 1; m <= 1; m++) {
10414 GemmMicrokernelTester()
10415 .mr(1)
10416 .nr(4)
10417 .kr(2)
10418 .sr(1)
10419 .m(m)
10420 .n(4)
10421 .k(8)
10422 .iterations(1)
10423 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
10424 }
10425 }
10426
10427 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_eq_8_subtile_n) {
10428 TEST_REQUIRES_X86_SSE2;
10429 for (uint32_t n = 1; n <= 4; n++) {
10430 GemmMicrokernelTester()
10431 .mr(1)
10432 .nr(4)
10433 .kr(2)
10434 .sr(1)
10435 .m(1)
10436 .n(n)
10437 .k(8)
10438 .iterations(1)
10439 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
10440 }
10441 }
10442
10443 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_lt_8) {
10444 TEST_REQUIRES_X86_SSE2;
10445 for (size_t k = 1; k < 8; k++) {
10446 GemmMicrokernelTester()
10447 .mr(1)
10448 .nr(4)
10449 .kr(2)
10450 .sr(1)
10451 .m(1)
10452 .n(4)
10453 .k(k)
10454 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
10455 }
10456 }
10457
10458 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_lt_8_subtile) {
10459 TEST_REQUIRES_X86_SSE2;
10460 for (size_t k = 1; k < 8; k++) {
10461 for (uint32_t m = 1; m <= 1; m++) {
10462 for (uint32_t n = 1; n <= 4; n++) {
10463 GemmMicrokernelTester()
10464 .mr(1)
10465 .nr(4)
10466 .kr(2)
10467 .sr(1)
10468 .m(m)
10469 .n(n)
10470 .k(k)
10471 .iterations(1)
10472 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
10473 }
10474 }
10475 }
10476 }
10477
10478 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_gt_8) {
10479 TEST_REQUIRES_X86_SSE2;
10480 for (size_t k = 9; k < 16; k++) {
10481 GemmMicrokernelTester()
10482 .mr(1)
10483 .nr(4)
10484 .kr(2)
10485 .sr(1)
10486 .m(1)
10487 .n(4)
10488 .k(k)
10489 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
10490 }
10491 }
10492
10493 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_gt_8_subtile) {
10494 TEST_REQUIRES_X86_SSE2;
10495 for (size_t k = 9; k < 16; k++) {
10496 for (uint32_t m = 1; m <= 1; m++) {
10497 for (uint32_t n = 1; n <= 4; n++) {
10498 GemmMicrokernelTester()
10499 .mr(1)
10500 .nr(4)
10501 .kr(2)
10502 .sr(1)
10503 .m(m)
10504 .n(n)
10505 .k(k)
10506 .iterations(1)
10507 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
10508 }
10509 }
10510 }
10511 }
10512
10513 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_div_8) {
10514 TEST_REQUIRES_X86_SSE2;
10515 for (size_t k = 16; k <= 80; k += 8) {
10516 GemmMicrokernelTester()
10517 .mr(1)
10518 .nr(4)
10519 .kr(2)
10520 .sr(1)
10521 .m(1)
10522 .n(4)
10523 .k(k)
10524 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
10525 }
10526 }
10527
10528 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_div_8_subtile) {
10529 TEST_REQUIRES_X86_SSE2;
10530 for (size_t k = 16; k <= 80; k += 8) {
10531 for (uint32_t m = 1; m <= 1; m++) {
10532 for (uint32_t n = 1; n <= 4; n++) {
10533 GemmMicrokernelTester()
10534 .mr(1)
10535 .nr(4)
10536 .kr(2)
10537 .sr(1)
10538 .m(m)
10539 .n(n)
10540 .k(k)
10541 .iterations(1)
10542 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
10543 }
10544 }
10545 }
10546 }
10547
10548 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_gt_4) {
10549 TEST_REQUIRES_X86_SSE2;
10550 for (uint32_t n = 5; n < 8; n++) {
10551 for (size_t k = 1; k <= 40; k += 9) {
10552 GemmMicrokernelTester()
10553 .mr(1)
10554 .nr(4)
10555 .kr(2)
10556 .sr(1)
10557 .m(1)
10558 .n(4)
10559 .k(k)
10560 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
10561 }
10562 }
10563 }
10564
10565 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_gt_4_strided_cn) {
10566 TEST_REQUIRES_X86_SSE2;
10567 for (uint32_t n = 5; n < 8; n++) {
10568 for (size_t k = 1; k <= 40; k += 9) {
10569 GemmMicrokernelTester()
10570 .mr(1)
10571 .nr(4)
10572 .kr(2)
10573 .sr(1)
10574 .m(1)
10575 .n(4)
10576 .k(k)
10577 .cn_stride(7)
10578 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
10579 }
10580 }
10581 }
10582
10583 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_gt_4_subtile) {
10584 TEST_REQUIRES_X86_SSE2;
10585 for (uint32_t n = 5; n < 8; n++) {
10586 for (size_t k = 1; k <= 40; k += 9) {
10587 for (uint32_t m = 1; m <= 1; m++) {
10588 GemmMicrokernelTester()
10589 .mr(1)
10590 .nr(4)
10591 .kr(2)
10592 .sr(1)
10593 .m(m)
10594 .n(n)
10595 .k(k)
10596 .iterations(1)
10597 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
10598 }
10599 }
10600 }
10601 }
10602
10603 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_div_4) {
10604 TEST_REQUIRES_X86_SSE2;
10605 for (uint32_t n = 8; n <= 12; n += 4) {
10606 for (size_t k = 1; k <= 40; k += 9) {
10607 GemmMicrokernelTester()
10608 .mr(1)
10609 .nr(4)
10610 .kr(2)
10611 .sr(1)
10612 .m(1)
10613 .n(4)
10614 .k(k)
10615 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
10616 }
10617 }
10618 }
10619
10620 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_div_4_strided_cn) {
10621 TEST_REQUIRES_X86_SSE2;
10622 for (uint32_t n = 8; n <= 12; n += 4) {
10623 for (size_t k = 1; k <= 40; k += 9) {
10624 GemmMicrokernelTester()
10625 .mr(1)
10626 .nr(4)
10627 .kr(2)
10628 .sr(1)
10629 .m(1)
10630 .n(n)
10631 .k(k)
10632 .cn_stride(7)
10633 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
10634 }
10635 }
10636 }
10637
10638 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_div_4_subtile) {
10639 TEST_REQUIRES_X86_SSE2;
10640 for (uint32_t n = 8; n <= 12; n += 4) {
10641 for (size_t k = 1; k <= 40; k += 9) {
10642 for (uint32_t m = 1; m <= 1; m++) {
10643 GemmMicrokernelTester()
10644 .mr(1)
10645 .nr(4)
10646 .kr(2)
10647 .sr(1)
10648 .m(m)
10649 .n(n)
10650 .k(k)
10651 .iterations(1)
10652 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
10653 }
10654 }
10655 }
10656 }
10657
10658 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, small_kernel) {
10659 TEST_REQUIRES_X86_SSE2;
10660 for (size_t k = 1; k <= 40; k += 9) {
10661 GemmMicrokernelTester()
10662 .mr(1)
10663 .nr(4)
10664 .kr(2)
10665 .sr(1)
10666 .m(1)
10667 .n(4)
10668 .k(k)
10669 .ks(3)
10670 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
10671 }
10672 }
10673
10674 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, small_kernel_subtile) {
10675 TEST_REQUIRES_X86_SSE2;
10676 for (size_t k = 1; k <= 40; k += 9) {
10677 for (uint32_t m = 1; m <= 1; m++) {
10678 for (uint32_t n = 1; n <= 4; n++) {
10679 GemmMicrokernelTester()
10680 .mr(1)
10681 .nr(4)
10682 .kr(2)
10683 .sr(1)
10684 .m(m)
10685 .n(n)
10686 .k(k)
10687 .ks(3)
10688 .iterations(1)
10689 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
10690 }
10691 }
10692 }
10693 }
10694
10695 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_gt_4_small_kernel) {
10696 TEST_REQUIRES_X86_SSE2;
10697 for (uint32_t n = 5; n < 8; n++) {
10698 for (size_t k = 1; k <= 40; k += 9) {
10699 GemmMicrokernelTester()
10700 .mr(1)
10701 .nr(4)
10702 .kr(2)
10703 .sr(1)
10704 .m(1)
10705 .n(4)
10706 .k(k)
10707 .ks(3)
10708 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
10709 }
10710 }
10711 }
10712
10713 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_div_4_small_kernel) {
10714 TEST_REQUIRES_X86_SSE2;
10715 for (uint32_t n = 8; n <= 12; n += 4) {
10716 for (size_t k = 1; k <= 40; k += 9) {
10717 GemmMicrokernelTester()
10718 .mr(1)
10719 .nr(4)
10720 .kr(2)
10721 .sr(1)
10722 .m(1)
10723 .n(4)
10724 .k(k)
10725 .ks(3)
10726 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
10727 }
10728 }
10729 }
10730
10731 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, strided_cm_subtile) {
10732 TEST_REQUIRES_X86_SSE2;
10733 for (size_t k = 1; k <= 40; k += 9) {
10734 for (uint32_t m = 1; m <= 1; m++) {
10735 for (uint32_t n = 1; n <= 4; n++) {
10736 GemmMicrokernelTester()
10737 .mr(1)
10738 .nr(4)
10739 .kr(2)
10740 .sr(1)
10741 .m(m)
10742 .n(n)
10743 .k(k)
10744 .cm_stride(7)
10745 .iterations(1)
10746 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
10747 }
10748 }
10749 }
10750 }
10751
10752 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, a_offset) {
10753 TEST_REQUIRES_X86_SSE2;
10754 for (size_t k = 1; k <= 40; k += 9) {
10755 GemmMicrokernelTester()
10756 .mr(1)
10757 .nr(4)
10758 .kr(2)
10759 .sr(1)
10760 .m(1)
10761 .n(4)
10762 .k(k)
10763 .ks(3)
10764 .a_offset(43)
10765 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
10766 }
10767 }
10768
10769 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, zero) {
10770 TEST_REQUIRES_X86_SSE2;
10771 for (uint32_t mz = 0; mz < 1; mz++) {
10772 for (size_t k = 1; k <= 40; k += 9) {
10773 GemmMicrokernelTester()
10774 .mr(1)
10775 .nr(4)
10776 .kr(2)
10777 .sr(1)
10778 .m(1)
10779 .n(4)
10780 .k(k)
10781 .ks(3)
10782 .a_offset(43)
10783 .zero_index(mz)
10784 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
10785 }
10786 }
10787 }
10788
10789 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, qmin) {
10790 TEST_REQUIRES_X86_SSE2;
10791 GemmMicrokernelTester()
10792 .mr(1)
10793 .nr(4)
10794 .kr(2)
10795 .sr(1)
10796 .m(1)
10797 .n(4)
10798 .k(8)
10799 .qmin(128)
10800 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
10801 }
10802
10803 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, qmax) {
10804 TEST_REQUIRES_X86_SSE2;
10805 GemmMicrokernelTester()
10806 .mr(1)
10807 .nr(4)
10808 .kr(2)
10809 .sr(1)
10810 .m(1)
10811 .n(4)
10812 .k(8)
10813 .qmax(128)
10814 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
10815 }
10816
10817 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, strided_cm) {
10818 TEST_REQUIRES_X86_SSE2;
10819 GemmMicrokernelTester()
10820 .mr(1)
10821 .nr(4)
10822 .kr(2)
10823 .sr(1)
10824 .m(1)
10825 .n(4)
10826 .k(8)
10827 .cm_stride(7)
10828 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
10829 }
10830
10831 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, no_a_zero_point) {
10832 TEST_REQUIRES_X86_SSE2;
10833 for (size_t k = 1; k <= 40; k += 9) {
10834 GemmMicrokernelTester()
10835 .mr(1)
10836 .nr(4)
10837 .kr(2)
10838 .sr(1)
10839 .m(1)
10840 .n(4)
10841 .k(k)
10842 .a_zero_point(0)
10843 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
10844 }
10845 }
10846
10847 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, no_b_zero_point) {
10848 TEST_REQUIRES_X86_SSE2;
10849 for (size_t k = 1; k <= 40; k += 9) {
10850 GemmMicrokernelTester()
10851 .mr(1)
10852 .nr(4)
10853 .kr(2)
10854 .sr(1)
10855 .m(1)
10856 .n(4)
10857 .k(k)
10858 .b_zero_point(0)
10859 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
10860 }
10861 }
10862
10863 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, no_zero_point) {
10864 TEST_REQUIRES_X86_SSE2;
10865 for (size_t k = 1; k <= 40; k += 9) {
10866 GemmMicrokernelTester()
10867 .mr(1)
10868 .nr(4)
10869 .kr(2)
10870 .sr(1)
10871 .m(1)
10872 .n(4)
10873 .k(k)
10874 .a_zero_point(0)
10875 .b_zero_point(0)
10876 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
10877 }
10878 }
10879#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
10880
10881
10882#if XNN_ARCH_X86 || XNN_ARCH_X86_64
10883 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_eq_8) {
10884 TEST_REQUIRES_X86_SSE2;
10885 GemmMicrokernelTester()
10886 .mr(2)
10887 .nr(4)
10888 .kr(2)
10889 .sr(1)
10890 .m(2)
10891 .n(4)
10892 .k(8)
10893 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
10894 }
10895
10896 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, strided_cn) {
10897 TEST_REQUIRES_X86_SSE2;
10898 GemmMicrokernelTester()
10899 .mr(2)
10900 .nr(4)
10901 .kr(2)
10902 .sr(1)
10903 .m(2)
10904 .n(4)
10905 .k(8)
10906 .cn_stride(7)
10907 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
10908 }
10909
10910 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_eq_8_subtile) {
10911 TEST_REQUIRES_X86_SSE2;
10912 for (uint32_t m = 1; m <= 2; m++) {
10913 for (uint32_t n = 1; n <= 4; n++) {
10914 GemmMicrokernelTester()
10915 .mr(2)
10916 .nr(4)
10917 .kr(2)
10918 .sr(1)
10919 .m(m)
10920 .n(n)
10921 .k(8)
10922 .iterations(1)
10923 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
10924 }
10925 }
10926 }
10927
10928 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_eq_8_subtile_m) {
10929 TEST_REQUIRES_X86_SSE2;
10930 for (uint32_t m = 1; m <= 2; m++) {
10931 GemmMicrokernelTester()
10932 .mr(2)
10933 .nr(4)
10934 .kr(2)
10935 .sr(1)
10936 .m(m)
10937 .n(4)
10938 .k(8)
10939 .iterations(1)
10940 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
10941 }
10942 }
10943
10944 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_eq_8_subtile_n) {
10945 TEST_REQUIRES_X86_SSE2;
10946 for (uint32_t n = 1; n <= 4; n++) {
10947 GemmMicrokernelTester()
10948 .mr(2)
10949 .nr(4)
10950 .kr(2)
10951 .sr(1)
10952 .m(2)
10953 .n(n)
10954 .k(8)
10955 .iterations(1)
10956 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
10957 }
10958 }
10959
10960 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_lt_8) {
10961 TEST_REQUIRES_X86_SSE2;
10962 for (size_t k = 1; k < 8; k++) {
10963 GemmMicrokernelTester()
10964 .mr(2)
10965 .nr(4)
10966 .kr(2)
10967 .sr(1)
10968 .m(2)
10969 .n(4)
10970 .k(k)
10971 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
10972 }
10973 }
10974
10975 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_lt_8_subtile) {
10976 TEST_REQUIRES_X86_SSE2;
10977 for (size_t k = 1; k < 8; k++) {
10978 for (uint32_t m = 1; m <= 2; m++) {
10979 for (uint32_t n = 1; n <= 4; n++) {
10980 GemmMicrokernelTester()
10981 .mr(2)
10982 .nr(4)
10983 .kr(2)
10984 .sr(1)
10985 .m(m)
10986 .n(n)
10987 .k(k)
10988 .iterations(1)
10989 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
10990 }
10991 }
10992 }
10993 }
10994
10995 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_gt_8) {
10996 TEST_REQUIRES_X86_SSE2;
10997 for (size_t k = 9; k < 16; k++) {
10998 GemmMicrokernelTester()
10999 .mr(2)
11000 .nr(4)
11001 .kr(2)
11002 .sr(1)
11003 .m(2)
11004 .n(4)
11005 .k(k)
11006 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
11007 }
11008 }
11009
11010 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_gt_8_subtile) {
11011 TEST_REQUIRES_X86_SSE2;
11012 for (size_t k = 9; k < 16; k++) {
11013 for (uint32_t m = 1; m <= 2; m++) {
11014 for (uint32_t n = 1; n <= 4; n++) {
11015 GemmMicrokernelTester()
11016 .mr(2)
11017 .nr(4)
11018 .kr(2)
11019 .sr(1)
11020 .m(m)
11021 .n(n)
11022 .k(k)
11023 .iterations(1)
11024 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
11025 }
11026 }
11027 }
11028 }
11029
11030 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_div_8) {
11031 TEST_REQUIRES_X86_SSE2;
11032 for (size_t k = 16; k <= 80; k += 8) {
11033 GemmMicrokernelTester()
11034 .mr(2)
11035 .nr(4)
11036 .kr(2)
11037 .sr(1)
11038 .m(2)
11039 .n(4)
11040 .k(k)
11041 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
11042 }
11043 }
11044
11045 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_div_8_subtile) {
11046 TEST_REQUIRES_X86_SSE2;
11047 for (size_t k = 16; k <= 80; k += 8) {
11048 for (uint32_t m = 1; m <= 2; m++) {
11049 for (uint32_t n = 1; n <= 4; n++) {
11050 GemmMicrokernelTester()
11051 .mr(2)
11052 .nr(4)
11053 .kr(2)
11054 .sr(1)
11055 .m(m)
11056 .n(n)
11057 .k(k)
11058 .iterations(1)
11059 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
11060 }
11061 }
11062 }
11063 }
11064
11065 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_gt_4) {
11066 TEST_REQUIRES_X86_SSE2;
11067 for (uint32_t n = 5; n < 8; n++) {
11068 for (size_t k = 1; k <= 40; k += 9) {
11069 GemmMicrokernelTester()
11070 .mr(2)
11071 .nr(4)
11072 .kr(2)
11073 .sr(1)
11074 .m(2)
11075 .n(4)
11076 .k(k)
11077 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
11078 }
11079 }
11080 }
11081
11082 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_gt_4_strided_cn) {
11083 TEST_REQUIRES_X86_SSE2;
11084 for (uint32_t n = 5; n < 8; n++) {
11085 for (size_t k = 1; k <= 40; k += 9) {
11086 GemmMicrokernelTester()
11087 .mr(2)
11088 .nr(4)
11089 .kr(2)
11090 .sr(1)
11091 .m(2)
11092 .n(4)
11093 .k(k)
11094 .cn_stride(7)
11095 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
11096 }
11097 }
11098 }
11099
11100 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_gt_4_subtile) {
11101 TEST_REQUIRES_X86_SSE2;
11102 for (uint32_t n = 5; n < 8; n++) {
11103 for (size_t k = 1; k <= 40; k += 9) {
11104 for (uint32_t m = 1; m <= 2; m++) {
11105 GemmMicrokernelTester()
11106 .mr(2)
11107 .nr(4)
11108 .kr(2)
11109 .sr(1)
11110 .m(m)
11111 .n(n)
11112 .k(k)
11113 .iterations(1)
11114 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
11115 }
11116 }
11117 }
11118 }
11119
11120 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_div_4) {
11121 TEST_REQUIRES_X86_SSE2;
11122 for (uint32_t n = 8; n <= 12; n += 4) {
11123 for (size_t k = 1; k <= 40; k += 9) {
11124 GemmMicrokernelTester()
11125 .mr(2)
11126 .nr(4)
11127 .kr(2)
11128 .sr(1)
11129 .m(2)
11130 .n(4)
11131 .k(k)
11132 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
11133 }
11134 }
11135 }
11136
11137 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_div_4_strided_cn) {
11138 TEST_REQUIRES_X86_SSE2;
11139 for (uint32_t n = 8; n <= 12; n += 4) {
11140 for (size_t k = 1; k <= 40; k += 9) {
11141 GemmMicrokernelTester()
11142 .mr(2)
11143 .nr(4)
11144 .kr(2)
11145 .sr(1)
11146 .m(2)
11147 .n(n)
11148 .k(k)
11149 .cn_stride(7)
11150 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
11151 }
11152 }
11153 }
11154
11155 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_div_4_subtile) {
11156 TEST_REQUIRES_X86_SSE2;
11157 for (uint32_t n = 8; n <= 12; n += 4) {
11158 for (size_t k = 1; k <= 40; k += 9) {
11159 for (uint32_t m = 1; m <= 2; m++) {
11160 GemmMicrokernelTester()
11161 .mr(2)
11162 .nr(4)
11163 .kr(2)
11164 .sr(1)
11165 .m(m)
11166 .n(n)
11167 .k(k)
11168 .iterations(1)
11169 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
11170 }
11171 }
11172 }
11173 }
11174
11175 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, small_kernel) {
11176 TEST_REQUIRES_X86_SSE2;
11177 for (size_t k = 1; k <= 40; k += 9) {
11178 GemmMicrokernelTester()
11179 .mr(2)
11180 .nr(4)
11181 .kr(2)
11182 .sr(1)
11183 .m(2)
11184 .n(4)
11185 .k(k)
11186 .ks(3)
11187 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
11188 }
11189 }
11190
11191 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, small_kernel_subtile) {
11192 TEST_REQUIRES_X86_SSE2;
11193 for (size_t k = 1; k <= 40; k += 9) {
11194 for (uint32_t m = 1; m <= 2; m++) {
11195 for (uint32_t n = 1; n <= 4; n++) {
11196 GemmMicrokernelTester()
11197 .mr(2)
11198 .nr(4)
11199 .kr(2)
11200 .sr(1)
11201 .m(m)
11202 .n(n)
11203 .k(k)
11204 .ks(3)
11205 .iterations(1)
11206 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
11207 }
11208 }
11209 }
11210 }
11211
11212 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_gt_4_small_kernel) {
11213 TEST_REQUIRES_X86_SSE2;
11214 for (uint32_t n = 5; n < 8; n++) {
11215 for (size_t k = 1; k <= 40; k += 9) {
11216 GemmMicrokernelTester()
11217 .mr(2)
11218 .nr(4)
11219 .kr(2)
11220 .sr(1)
11221 .m(2)
11222 .n(4)
11223 .k(k)
11224 .ks(3)
11225 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
11226 }
11227 }
11228 }
11229
11230 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_div_4_small_kernel) {
11231 TEST_REQUIRES_X86_SSE2;
11232 for (uint32_t n = 8; n <= 12; n += 4) {
11233 for (size_t k = 1; k <= 40; k += 9) {
11234 GemmMicrokernelTester()
11235 .mr(2)
11236 .nr(4)
11237 .kr(2)
11238 .sr(1)
11239 .m(2)
11240 .n(4)
11241 .k(k)
11242 .ks(3)
11243 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
11244 }
11245 }
11246 }
11247
11248 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, strided_cm_subtile) {
11249 TEST_REQUIRES_X86_SSE2;
11250 for (size_t k = 1; k <= 40; k += 9) {
11251 for (uint32_t m = 1; m <= 2; m++) {
11252 for (uint32_t n = 1; n <= 4; n++) {
11253 GemmMicrokernelTester()
11254 .mr(2)
11255 .nr(4)
11256 .kr(2)
11257 .sr(1)
11258 .m(m)
11259 .n(n)
11260 .k(k)
11261 .cm_stride(7)
11262 .iterations(1)
11263 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
11264 }
11265 }
11266 }
11267 }
11268
11269 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, a_offset) {
11270 TEST_REQUIRES_X86_SSE2;
11271 for (size_t k = 1; k <= 40; k += 9) {
11272 GemmMicrokernelTester()
11273 .mr(2)
11274 .nr(4)
11275 .kr(2)
11276 .sr(1)
11277 .m(2)
11278 .n(4)
11279 .k(k)
11280 .ks(3)
11281 .a_offset(83)
11282 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
11283 }
11284 }
11285
11286 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, zero) {
11287 TEST_REQUIRES_X86_SSE2;
11288 for (uint32_t mz = 0; mz < 2; mz++) {
11289 for (size_t k = 1; k <= 40; k += 9) {
11290 GemmMicrokernelTester()
11291 .mr(2)
11292 .nr(4)
11293 .kr(2)
11294 .sr(1)
11295 .m(2)
11296 .n(4)
11297 .k(k)
11298 .ks(3)
11299 .a_offset(83)
11300 .zero_index(mz)
11301 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
11302 }
11303 }
11304 }
11305
11306 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, qmin) {
11307 TEST_REQUIRES_X86_SSE2;
11308 GemmMicrokernelTester()
11309 .mr(2)
11310 .nr(4)
11311 .kr(2)
11312 .sr(1)
11313 .m(2)
11314 .n(4)
11315 .k(8)
11316 .qmin(128)
11317 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
11318 }
11319
11320 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, qmax) {
11321 TEST_REQUIRES_X86_SSE2;
11322 GemmMicrokernelTester()
11323 .mr(2)
11324 .nr(4)
11325 .kr(2)
11326 .sr(1)
11327 .m(2)
11328 .n(4)
11329 .k(8)
11330 .qmax(128)
11331 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
11332 }
11333
11334 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, strided_cm) {
11335 TEST_REQUIRES_X86_SSE2;
11336 GemmMicrokernelTester()
11337 .mr(2)
11338 .nr(4)
11339 .kr(2)
11340 .sr(1)
11341 .m(2)
11342 .n(4)
11343 .k(8)
11344 .cm_stride(7)
11345 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
11346 }
11347
11348 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, no_a_zero_point) {
11349 TEST_REQUIRES_X86_SSE2;
11350 for (size_t k = 1; k <= 40; k += 9) {
11351 GemmMicrokernelTester()
11352 .mr(2)
11353 .nr(4)
11354 .kr(2)
11355 .sr(1)
11356 .m(2)
11357 .n(4)
11358 .k(k)
11359 .a_zero_point(0)
11360 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
11361 }
11362 }
11363
11364 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, no_b_zero_point) {
11365 TEST_REQUIRES_X86_SSE2;
11366 for (size_t k = 1; k <= 40; k += 9) {
11367 GemmMicrokernelTester()
11368 .mr(2)
11369 .nr(4)
11370 .kr(2)
11371 .sr(1)
11372 .m(2)
11373 .n(4)
11374 .k(k)
11375 .b_zero_point(0)
11376 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
11377 }
11378 }
11379
11380 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, no_zero_point) {
11381 TEST_REQUIRES_X86_SSE2;
11382 for (size_t k = 1; k <= 40; k += 9) {
11383 GemmMicrokernelTester()
11384 .mr(2)
11385 .nr(4)
11386 .kr(2)
11387 .sr(1)
11388 .m(2)
11389 .n(4)
11390 .k(k)
11391 .a_zero_point(0)
11392 .b_zero_point(0)
11393 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
11394 }
11395 }
11396#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
11397
11398
11399#if XNN_ARCH_X86 || XNN_ARCH_X86_64
11400 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_eq_8) {
11401 TEST_REQUIRES_X86_SSE2;
11402 GemmMicrokernelTester()
11403 .mr(3)
11404 .nr(4)
11405 .kr(2)
11406 .sr(1)
11407 .m(3)
11408 .n(4)
11409 .k(8)
11410 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
11411 }
11412
11413 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, strided_cn) {
11414 TEST_REQUIRES_X86_SSE2;
11415 GemmMicrokernelTester()
11416 .mr(3)
11417 .nr(4)
11418 .kr(2)
11419 .sr(1)
11420 .m(3)
11421 .n(4)
11422 .k(8)
11423 .cn_stride(7)
11424 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
11425 }
11426
11427 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_eq_8_subtile) {
11428 TEST_REQUIRES_X86_SSE2;
11429 for (uint32_t m = 1; m <= 3; m++) {
11430 for (uint32_t n = 1; n <= 4; n++) {
11431 GemmMicrokernelTester()
11432 .mr(3)
11433 .nr(4)
11434 .kr(2)
11435 .sr(1)
11436 .m(m)
11437 .n(n)
11438 .k(8)
11439 .iterations(1)
11440 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
11441 }
11442 }
11443 }
11444
11445 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_eq_8_subtile_m) {
11446 TEST_REQUIRES_X86_SSE2;
11447 for (uint32_t m = 1; m <= 3; m++) {
11448 GemmMicrokernelTester()
11449 .mr(3)
11450 .nr(4)
11451 .kr(2)
11452 .sr(1)
11453 .m(m)
11454 .n(4)
11455 .k(8)
11456 .iterations(1)
11457 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
11458 }
11459 }
11460
11461 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_eq_8_subtile_n) {
11462 TEST_REQUIRES_X86_SSE2;
11463 for (uint32_t n = 1; n <= 4; n++) {
11464 GemmMicrokernelTester()
11465 .mr(3)
11466 .nr(4)
11467 .kr(2)
11468 .sr(1)
11469 .m(3)
11470 .n(n)
11471 .k(8)
11472 .iterations(1)
11473 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
11474 }
11475 }
11476
11477 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_lt_8) {
11478 TEST_REQUIRES_X86_SSE2;
11479 for (size_t k = 1; k < 8; k++) {
11480 GemmMicrokernelTester()
11481 .mr(3)
11482 .nr(4)
11483 .kr(2)
11484 .sr(1)
11485 .m(3)
11486 .n(4)
11487 .k(k)
11488 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
11489 }
11490 }
11491
11492 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_lt_8_subtile) {
11493 TEST_REQUIRES_X86_SSE2;
11494 for (size_t k = 1; k < 8; k++) {
11495 for (uint32_t m = 1; m <= 3; m++) {
11496 for (uint32_t n = 1; n <= 4; n++) {
11497 GemmMicrokernelTester()
11498 .mr(3)
11499 .nr(4)
11500 .kr(2)
11501 .sr(1)
11502 .m(m)
11503 .n(n)
11504 .k(k)
11505 .iterations(1)
11506 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
11507 }
11508 }
11509 }
11510 }
11511
11512 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_gt_8) {
11513 TEST_REQUIRES_X86_SSE2;
11514 for (size_t k = 9; k < 16; k++) {
11515 GemmMicrokernelTester()
11516 .mr(3)
11517 .nr(4)
11518 .kr(2)
11519 .sr(1)
11520 .m(3)
11521 .n(4)
11522 .k(k)
11523 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
11524 }
11525 }
11526
11527 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_gt_8_subtile) {
11528 TEST_REQUIRES_X86_SSE2;
11529 for (size_t k = 9; k < 16; k++) {
11530 for (uint32_t m = 1; m <= 3; m++) {
11531 for (uint32_t n = 1; n <= 4; n++) {
11532 GemmMicrokernelTester()
11533 .mr(3)
11534 .nr(4)
11535 .kr(2)
11536 .sr(1)
11537 .m(m)
11538 .n(n)
11539 .k(k)
11540 .iterations(1)
11541 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
11542 }
11543 }
11544 }
11545 }
11546
11547 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_div_8) {
11548 TEST_REQUIRES_X86_SSE2;
11549 for (size_t k = 16; k <= 80; k += 8) {
11550 GemmMicrokernelTester()
11551 .mr(3)
11552 .nr(4)
11553 .kr(2)
11554 .sr(1)
11555 .m(3)
11556 .n(4)
11557 .k(k)
11558 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
11559 }
11560 }
11561
11562 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_div_8_subtile) {
11563 TEST_REQUIRES_X86_SSE2;
11564 for (size_t k = 16; k <= 80; k += 8) {
11565 for (uint32_t m = 1; m <= 3; m++) {
11566 for (uint32_t n = 1; n <= 4; n++) {
11567 GemmMicrokernelTester()
11568 .mr(3)
11569 .nr(4)
11570 .kr(2)
11571 .sr(1)
11572 .m(m)
11573 .n(n)
11574 .k(k)
11575 .iterations(1)
11576 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
11577 }
11578 }
11579 }
11580 }
11581
11582 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, n_gt_4) {
11583 TEST_REQUIRES_X86_SSE2;
11584 for (uint32_t n = 5; n < 8; n++) {
11585 for (size_t k = 1; k <= 40; k += 9) {
11586 GemmMicrokernelTester()
11587 .mr(3)
11588 .nr(4)
11589 .kr(2)
11590 .sr(1)
11591 .m(3)
11592 .n(4)
11593 .k(k)
11594 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
11595 }
11596 }
11597 }
11598
11599 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, n_gt_4_strided_cn) {
11600 TEST_REQUIRES_X86_SSE2;
11601 for (uint32_t n = 5; n < 8; n++) {
11602 for (size_t k = 1; k <= 40; k += 9) {
11603 GemmMicrokernelTester()
11604 .mr(3)
11605 .nr(4)
11606 .kr(2)
11607 .sr(1)
11608 .m(3)
11609 .n(4)
11610 .k(k)
11611 .cn_stride(7)
11612 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
11613 }
11614 }
11615 }
11616
11617 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, n_gt_4_subtile) {
11618 TEST_REQUIRES_X86_SSE2;
11619 for (uint32_t n = 5; n < 8; n++) {
11620 for (size_t k = 1; k <= 40; k += 9) {
11621 for (uint32_t m = 1; m <= 3; m++) {
11622 GemmMicrokernelTester()
11623 .mr(3)
11624 .nr(4)
11625 .kr(2)
11626 .sr(1)
11627 .m(m)
11628 .n(n)
11629 .k(k)
11630 .iterations(1)
11631 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
11632 }
11633 }
11634 }
11635 }
11636
11637 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, n_div_4) {
11638 TEST_REQUIRES_X86_SSE2;
11639 for (uint32_t n = 8; n <= 12; n += 4) {
11640 for (size_t k = 1; k <= 40; k += 9) {
11641 GemmMicrokernelTester()
11642 .mr(3)
11643 .nr(4)
11644 .kr(2)
11645 .sr(1)
11646 .m(3)
11647 .n(4)
11648 .k(k)
11649 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
11650 }
11651 }
11652 }
11653
11654 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, n_div_4_strided_cn) {
11655 TEST_REQUIRES_X86_SSE2;
11656 for (uint32_t n = 8; n <= 12; n += 4) {
11657 for (size_t k = 1; k <= 40; k += 9) {
11658 GemmMicrokernelTester()
11659 .mr(3)
11660 .nr(4)
11661 .kr(2)
11662 .sr(1)
11663 .m(3)
11664 .n(n)
11665 .k(k)
11666 .cn_stride(7)
11667 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
11668 }
11669 }
11670 }
11671
11672 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, n_div_4_subtile) {
11673 TEST_REQUIRES_X86_SSE2;
11674 for (uint32_t n = 8; n <= 12; n += 4) {
11675 for (size_t k = 1; k <= 40; k += 9) {
11676 for (uint32_t m = 1; m <= 3; m++) {
11677 GemmMicrokernelTester()
11678 .mr(3)
11679 .nr(4)
11680 .kr(2)
11681 .sr(1)
11682 .m(m)
11683 .n(n)
11684 .k(k)
11685 .iterations(1)
11686 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
11687 }
11688 }
11689 }
11690 }
11691
11692 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, small_kernel) {
11693 TEST_REQUIRES_X86_SSE2;
11694 for (size_t k = 1; k <= 40; k += 9) {
11695 GemmMicrokernelTester()
11696 .mr(3)
11697 .nr(4)
11698 .kr(2)
11699 .sr(1)
11700 .m(3)
11701 .n(4)
11702 .k(k)
11703 .ks(3)
11704 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
11705 }
11706 }
11707
11708 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, small_kernel_subtile) {
11709 TEST_REQUIRES_X86_SSE2;
11710 for (size_t k = 1; k <= 40; k += 9) {
11711 for (uint32_t m = 1; m <= 3; m++) {
11712 for (uint32_t n = 1; n <= 4; n++) {
11713 GemmMicrokernelTester()
11714 .mr(3)
11715 .nr(4)
11716 .kr(2)
11717 .sr(1)
11718 .m(m)
11719 .n(n)
11720 .k(k)
11721 .ks(3)
11722 .iterations(1)
11723 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
11724 }
11725 }
11726 }
11727 }
11728
11729 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, n_gt_4_small_kernel) {
11730 TEST_REQUIRES_X86_SSE2;
11731 for (uint32_t n = 5; n < 8; n++) {
11732 for (size_t k = 1; k <= 40; k += 9) {
11733 GemmMicrokernelTester()
11734 .mr(3)
11735 .nr(4)
11736 .kr(2)
11737 .sr(1)
11738 .m(3)
11739 .n(4)
11740 .k(k)
11741 .ks(3)
11742 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
11743 }
11744 }
11745 }
11746
11747 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, n_div_4_small_kernel) {
11748 TEST_REQUIRES_X86_SSE2;
11749 for (uint32_t n = 8; n <= 12; n += 4) {
11750 for (size_t k = 1; k <= 40; k += 9) {
11751 GemmMicrokernelTester()
11752 .mr(3)
11753 .nr(4)
11754 .kr(2)
11755 .sr(1)
11756 .m(3)
11757 .n(4)
11758 .k(k)
11759 .ks(3)
11760 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
11761 }
11762 }
11763 }
11764
11765 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, strided_cm_subtile) {
11766 TEST_REQUIRES_X86_SSE2;
11767 for (size_t k = 1; k <= 40; k += 9) {
11768 for (uint32_t m = 1; m <= 3; m++) {
11769 for (uint32_t n = 1; n <= 4; n++) {
11770 GemmMicrokernelTester()
11771 .mr(3)
11772 .nr(4)
11773 .kr(2)
11774 .sr(1)
11775 .m(m)
11776 .n(n)
11777 .k(k)
11778 .cm_stride(7)
11779 .iterations(1)
11780 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
11781 }
11782 }
11783 }
11784 }
11785
11786 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, a_offset) {
11787 TEST_REQUIRES_X86_SSE2;
11788 for (size_t k = 1; k <= 40; k += 9) {
11789 GemmMicrokernelTester()
11790 .mr(3)
11791 .nr(4)
11792 .kr(2)
11793 .sr(1)
11794 .m(3)
11795 .n(4)
11796 .k(k)
11797 .ks(3)
11798 .a_offset(127)
11799 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
11800 }
11801 }
11802
11803 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, zero) {
11804 TEST_REQUIRES_X86_SSE2;
11805 for (uint32_t mz = 0; mz < 3; mz++) {
11806 for (size_t k = 1; k <= 40; k += 9) {
11807 GemmMicrokernelTester()
11808 .mr(3)
11809 .nr(4)
11810 .kr(2)
11811 .sr(1)
11812 .m(3)
11813 .n(4)
11814 .k(k)
11815 .ks(3)
11816 .a_offset(127)
11817 .zero_index(mz)
11818 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
11819 }
11820 }
11821 }
11822
11823 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, qmin) {
11824 TEST_REQUIRES_X86_SSE2;
11825 GemmMicrokernelTester()
11826 .mr(3)
11827 .nr(4)
11828 .kr(2)
11829 .sr(1)
11830 .m(3)
11831 .n(4)
11832 .k(8)
11833 .qmin(128)
11834 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
11835 }
11836
11837 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, qmax) {
11838 TEST_REQUIRES_X86_SSE2;
11839 GemmMicrokernelTester()
11840 .mr(3)
11841 .nr(4)
11842 .kr(2)
11843 .sr(1)
11844 .m(3)
11845 .n(4)
11846 .k(8)
11847 .qmax(128)
11848 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
11849 }
11850
11851 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, strided_cm) {
11852 TEST_REQUIRES_X86_SSE2;
11853 GemmMicrokernelTester()
11854 .mr(3)
11855 .nr(4)
11856 .kr(2)
11857 .sr(1)
11858 .m(3)
11859 .n(4)
11860 .k(8)
11861 .cm_stride(7)
11862 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
11863 }
11864
11865 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, no_a_zero_point) {
11866 TEST_REQUIRES_X86_SSE2;
11867 for (size_t k = 1; k <= 40; k += 9) {
11868 GemmMicrokernelTester()
11869 .mr(3)
11870 .nr(4)
11871 .kr(2)
11872 .sr(1)
11873 .m(3)
11874 .n(4)
11875 .k(k)
11876 .a_zero_point(0)
11877 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
11878 }
11879 }
11880
11881 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, no_b_zero_point) {
11882 TEST_REQUIRES_X86_SSE2;
11883 for (size_t k = 1; k <= 40; k += 9) {
11884 GemmMicrokernelTester()
11885 .mr(3)
11886 .nr(4)
11887 .kr(2)
11888 .sr(1)
11889 .m(3)
11890 .n(4)
11891 .k(k)
11892 .b_zero_point(0)
11893 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
11894 }
11895 }
11896
11897 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, no_zero_point) {
11898 TEST_REQUIRES_X86_SSE2;
11899 for (size_t k = 1; k <= 40; k += 9) {
11900 GemmMicrokernelTester()
11901 .mr(3)
11902 .nr(4)
11903 .kr(2)
11904 .sr(1)
11905 .m(3)
11906 .n(4)
11907 .k(k)
11908 .a_zero_point(0)
11909 .b_zero_point(0)
11910 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
11911 }
11912 }
11913#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
11914
11915
11916#if XNN_ARCH_X86 || XNN_ARCH_X86_64
11917 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_eq_8) {
11918 TEST_REQUIRES_X86_SSE2;
11919 GemmMicrokernelTester()
11920 .mr(4)
11921 .nr(4)
11922 .kr(2)
11923 .sr(1)
11924 .m(4)
11925 .n(4)
11926 .k(8)
11927 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
11928 }
11929
11930 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, strided_cn) {
11931 TEST_REQUIRES_X86_SSE2;
11932 GemmMicrokernelTester()
11933 .mr(4)
11934 .nr(4)
11935 .kr(2)
11936 .sr(1)
11937 .m(4)
11938 .n(4)
11939 .k(8)
11940 .cn_stride(7)
11941 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
11942 }
11943
11944 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_eq_8_subtile) {
11945 TEST_REQUIRES_X86_SSE2;
11946 for (uint32_t m = 1; m <= 4; m++) {
11947 for (uint32_t n = 1; n <= 4; n++) {
11948 GemmMicrokernelTester()
11949 .mr(4)
11950 .nr(4)
11951 .kr(2)
11952 .sr(1)
11953 .m(m)
11954 .n(n)
11955 .k(8)
11956 .iterations(1)
11957 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
11958 }
11959 }
11960 }
11961
11962 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_eq_8_subtile_m) {
11963 TEST_REQUIRES_X86_SSE2;
11964 for (uint32_t m = 1; m <= 4; m++) {
11965 GemmMicrokernelTester()
11966 .mr(4)
11967 .nr(4)
11968 .kr(2)
11969 .sr(1)
11970 .m(m)
11971 .n(4)
11972 .k(8)
11973 .iterations(1)
11974 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
11975 }
11976 }
11977
11978 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_eq_8_subtile_n) {
11979 TEST_REQUIRES_X86_SSE2;
11980 for (uint32_t n = 1; n <= 4; n++) {
11981 GemmMicrokernelTester()
11982 .mr(4)
11983 .nr(4)
11984 .kr(2)
11985 .sr(1)
11986 .m(4)
11987 .n(n)
11988 .k(8)
11989 .iterations(1)
11990 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
11991 }
11992 }
11993
11994 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_lt_8) {
11995 TEST_REQUIRES_X86_SSE2;
11996 for (size_t k = 1; k < 8; k++) {
11997 GemmMicrokernelTester()
11998 .mr(4)
11999 .nr(4)
12000 .kr(2)
12001 .sr(1)
12002 .m(4)
12003 .n(4)
12004 .k(k)
12005 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
12006 }
12007 }
12008
12009 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_lt_8_subtile) {
12010 TEST_REQUIRES_X86_SSE2;
12011 for (size_t k = 1; k < 8; k++) {
12012 for (uint32_t m = 1; m <= 4; m++) {
12013 for (uint32_t n = 1; n <= 4; n++) {
12014 GemmMicrokernelTester()
12015 .mr(4)
12016 .nr(4)
12017 .kr(2)
12018 .sr(1)
12019 .m(m)
12020 .n(n)
12021 .k(k)
12022 .iterations(1)
12023 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
12024 }
12025 }
12026 }
12027 }
12028
12029 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_gt_8) {
12030 TEST_REQUIRES_X86_SSE2;
12031 for (size_t k = 9; k < 16; k++) {
12032 GemmMicrokernelTester()
12033 .mr(4)
12034 .nr(4)
12035 .kr(2)
12036 .sr(1)
12037 .m(4)
12038 .n(4)
12039 .k(k)
12040 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
12041 }
12042 }
12043
12044 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_gt_8_subtile) {
12045 TEST_REQUIRES_X86_SSE2;
12046 for (size_t k = 9; k < 16; k++) {
12047 for (uint32_t m = 1; m <= 4; m++) {
12048 for (uint32_t n = 1; n <= 4; n++) {
12049 GemmMicrokernelTester()
12050 .mr(4)
12051 .nr(4)
12052 .kr(2)
12053 .sr(1)
12054 .m(m)
12055 .n(n)
12056 .k(k)
12057 .iterations(1)
12058 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
12059 }
12060 }
12061 }
12062 }
12063
12064 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_div_8) {
12065 TEST_REQUIRES_X86_SSE2;
12066 for (size_t k = 16; k <= 80; k += 8) {
12067 GemmMicrokernelTester()
12068 .mr(4)
12069 .nr(4)
12070 .kr(2)
12071 .sr(1)
12072 .m(4)
12073 .n(4)
12074 .k(k)
12075 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
12076 }
12077 }
12078
12079 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_div_8_subtile) {
12080 TEST_REQUIRES_X86_SSE2;
12081 for (size_t k = 16; k <= 80; k += 8) {
12082 for (uint32_t m = 1; m <= 4; m++) {
12083 for (uint32_t n = 1; n <= 4; n++) {
12084 GemmMicrokernelTester()
12085 .mr(4)
12086 .nr(4)
12087 .kr(2)
12088 .sr(1)
12089 .m(m)
12090 .n(n)
12091 .k(k)
12092 .iterations(1)
12093 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
12094 }
12095 }
12096 }
12097 }
12098
12099 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_gt_4) {
12100 TEST_REQUIRES_X86_SSE2;
12101 for (uint32_t n = 5; n < 8; n++) {
12102 for (size_t k = 1; k <= 40; k += 9) {
12103 GemmMicrokernelTester()
12104 .mr(4)
12105 .nr(4)
12106 .kr(2)
12107 .sr(1)
12108 .m(4)
12109 .n(4)
12110 .k(k)
12111 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
12112 }
12113 }
12114 }
12115
12116 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_gt_4_strided_cn) {
12117 TEST_REQUIRES_X86_SSE2;
12118 for (uint32_t n = 5; n < 8; n++) {
12119 for (size_t k = 1; k <= 40; k += 9) {
12120 GemmMicrokernelTester()
12121 .mr(4)
12122 .nr(4)
12123 .kr(2)
12124 .sr(1)
12125 .m(4)
12126 .n(4)
12127 .k(k)
12128 .cn_stride(7)
12129 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
12130 }
12131 }
12132 }
12133
12134 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_gt_4_subtile) {
12135 TEST_REQUIRES_X86_SSE2;
12136 for (uint32_t n = 5; n < 8; n++) {
12137 for (size_t k = 1; k <= 40; k += 9) {
12138 for (uint32_t m = 1; m <= 4; m++) {
12139 GemmMicrokernelTester()
12140 .mr(4)
12141 .nr(4)
12142 .kr(2)
12143 .sr(1)
12144 .m(m)
12145 .n(n)
12146 .k(k)
12147 .iterations(1)
12148 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
12149 }
12150 }
12151 }
12152 }
12153
12154 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_div_4) {
12155 TEST_REQUIRES_X86_SSE2;
12156 for (uint32_t n = 8; n <= 12; n += 4) {
12157 for (size_t k = 1; k <= 40; k += 9) {
12158 GemmMicrokernelTester()
12159 .mr(4)
12160 .nr(4)
12161 .kr(2)
12162 .sr(1)
12163 .m(4)
12164 .n(4)
12165 .k(k)
12166 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
12167 }
12168 }
12169 }
12170
12171 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_div_4_strided_cn) {
12172 TEST_REQUIRES_X86_SSE2;
12173 for (uint32_t n = 8; n <= 12; n += 4) {
12174 for (size_t k = 1; k <= 40; k += 9) {
12175 GemmMicrokernelTester()
12176 .mr(4)
12177 .nr(4)
12178 .kr(2)
12179 .sr(1)
12180 .m(4)
12181 .n(n)
12182 .k(k)
12183 .cn_stride(7)
12184 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
12185 }
12186 }
12187 }
12188
12189 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_div_4_subtile) {
12190 TEST_REQUIRES_X86_SSE2;
12191 for (uint32_t n = 8; n <= 12; n += 4) {
12192 for (size_t k = 1; k <= 40; k += 9) {
12193 for (uint32_t m = 1; m <= 4; m++) {
12194 GemmMicrokernelTester()
12195 .mr(4)
12196 .nr(4)
12197 .kr(2)
12198 .sr(1)
12199 .m(m)
12200 .n(n)
12201 .k(k)
12202 .iterations(1)
12203 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
12204 }
12205 }
12206 }
12207 }
12208
12209 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, small_kernel) {
12210 TEST_REQUIRES_X86_SSE2;
12211 for (size_t k = 1; k <= 40; k += 9) {
12212 GemmMicrokernelTester()
12213 .mr(4)
12214 .nr(4)
12215 .kr(2)
12216 .sr(1)
12217 .m(4)
12218 .n(4)
12219 .k(k)
12220 .ks(3)
12221 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
12222 }
12223 }
12224
12225 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, small_kernel_subtile) {
12226 TEST_REQUIRES_X86_SSE2;
12227 for (size_t k = 1; k <= 40; k += 9) {
12228 for (uint32_t m = 1; m <= 4; m++) {
12229 for (uint32_t n = 1; n <= 4; n++) {
12230 GemmMicrokernelTester()
12231 .mr(4)
12232 .nr(4)
12233 .kr(2)
12234 .sr(1)
12235 .m(m)
12236 .n(n)
12237 .k(k)
12238 .ks(3)
12239 .iterations(1)
12240 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
12241 }
12242 }
12243 }
12244 }
12245
12246 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_gt_4_small_kernel) {
12247 TEST_REQUIRES_X86_SSE2;
12248 for (uint32_t n = 5; n < 8; n++) {
12249 for (size_t k = 1; k <= 40; k += 9) {
12250 GemmMicrokernelTester()
12251 .mr(4)
12252 .nr(4)
12253 .kr(2)
12254 .sr(1)
12255 .m(4)
12256 .n(4)
12257 .k(k)
12258 .ks(3)
12259 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
12260 }
12261 }
12262 }
12263
12264 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_div_4_small_kernel) {
12265 TEST_REQUIRES_X86_SSE2;
12266 for (uint32_t n = 8; n <= 12; n += 4) {
12267 for (size_t k = 1; k <= 40; k += 9) {
12268 GemmMicrokernelTester()
12269 .mr(4)
12270 .nr(4)
12271 .kr(2)
12272 .sr(1)
12273 .m(4)
12274 .n(4)
12275 .k(k)
12276 .ks(3)
12277 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
12278 }
12279 }
12280 }
12281
12282 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, strided_cm_subtile) {
12283 TEST_REQUIRES_X86_SSE2;
12284 for (size_t k = 1; k <= 40; k += 9) {
12285 for (uint32_t m = 1; m <= 4; m++) {
12286 for (uint32_t n = 1; n <= 4; n++) {
12287 GemmMicrokernelTester()
12288 .mr(4)
12289 .nr(4)
12290 .kr(2)
12291 .sr(1)
12292 .m(m)
12293 .n(n)
12294 .k(k)
12295 .cm_stride(7)
12296 .iterations(1)
12297 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
12298 }
12299 }
12300 }
12301 }
12302
12303 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, a_offset) {
12304 TEST_REQUIRES_X86_SSE2;
12305 for (size_t k = 1; k <= 40; k += 9) {
12306 GemmMicrokernelTester()
12307 .mr(4)
12308 .nr(4)
12309 .kr(2)
12310 .sr(1)
12311 .m(4)
12312 .n(4)
12313 .k(k)
12314 .ks(3)
12315 .a_offset(163)
12316 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
12317 }
12318 }
12319
12320 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, zero) {
12321 TEST_REQUIRES_X86_SSE2;
12322 for (uint32_t mz = 0; mz < 4; mz++) {
12323 for (size_t k = 1; k <= 40; k += 9) {
12324 GemmMicrokernelTester()
12325 .mr(4)
12326 .nr(4)
12327 .kr(2)
12328 .sr(1)
12329 .m(4)
12330 .n(4)
12331 .k(k)
12332 .ks(3)
12333 .a_offset(163)
12334 .zero_index(mz)
12335 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
12336 }
12337 }
12338 }
12339
12340 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, qmin) {
12341 TEST_REQUIRES_X86_SSE2;
12342 GemmMicrokernelTester()
12343 .mr(4)
12344 .nr(4)
12345 .kr(2)
12346 .sr(1)
12347 .m(4)
12348 .n(4)
12349 .k(8)
12350 .qmin(128)
12351 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
12352 }
12353
12354 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, qmax) {
12355 TEST_REQUIRES_X86_SSE2;
12356 GemmMicrokernelTester()
12357 .mr(4)
12358 .nr(4)
12359 .kr(2)
12360 .sr(1)
12361 .m(4)
12362 .n(4)
12363 .k(8)
12364 .qmax(128)
12365 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
12366 }
12367
12368 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, strided_cm) {
12369 TEST_REQUIRES_X86_SSE2;
12370 GemmMicrokernelTester()
12371 .mr(4)
12372 .nr(4)
12373 .kr(2)
12374 .sr(1)
12375 .m(4)
12376 .n(4)
12377 .k(8)
12378 .cm_stride(7)
12379 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
12380 }
12381
12382 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, no_a_zero_point) {
12383 TEST_REQUIRES_X86_SSE2;
12384 for (size_t k = 1; k <= 40; k += 9) {
12385 GemmMicrokernelTester()
12386 .mr(4)
12387 .nr(4)
12388 .kr(2)
12389 .sr(1)
12390 .m(4)
12391 .n(4)
12392 .k(k)
12393 .a_zero_point(0)
12394 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
12395 }
12396 }
12397
12398 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, no_b_zero_point) {
12399 TEST_REQUIRES_X86_SSE2;
12400 for (size_t k = 1; k <= 40; k += 9) {
12401 GemmMicrokernelTester()
12402 .mr(4)
12403 .nr(4)
12404 .kr(2)
12405 .sr(1)
12406 .m(4)
12407 .n(4)
12408 .k(k)
12409 .b_zero_point(0)
12410 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
12411 }
12412 }
12413
12414 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, no_zero_point) {
12415 TEST_REQUIRES_X86_SSE2;
12416 for (size_t k = 1; k <= 40; k += 9) {
12417 GemmMicrokernelTester()
12418 .mr(4)
12419 .nr(4)
12420 .kr(2)
12421 .sr(1)
12422 .m(4)
12423 .n(4)
12424 .k(k)
12425 .a_zero_point(0)
12426 .b_zero_point(0)
12427 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
12428 }
12429 }
12430#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
12431
12432
12433#if XNN_ARCH_X86 || XNN_ARCH_X86_64
12434 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_eq_8) {
12435 TEST_REQUIRES_X86_SSE41;
12436 GemmMicrokernelTester()
12437 .mr(1)
12438 .nr(4)
12439 .kr(2)
12440 .sr(1)
12441 .m(1)
12442 .n(4)
12443 .k(8)
12444 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
12445 }
12446
12447 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, strided_cn) {
12448 TEST_REQUIRES_X86_SSE41;
12449 GemmMicrokernelTester()
12450 .mr(1)
12451 .nr(4)
12452 .kr(2)
12453 .sr(1)
12454 .m(1)
12455 .n(4)
12456 .k(8)
12457 .cn_stride(7)
12458 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
12459 }
12460
12461 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_eq_8_subtile) {
12462 TEST_REQUIRES_X86_SSE41;
12463 for (uint32_t m = 1; m <= 1; m++) {
12464 for (uint32_t n = 1; n <= 4; n++) {
12465 GemmMicrokernelTester()
12466 .mr(1)
12467 .nr(4)
12468 .kr(2)
12469 .sr(1)
12470 .m(m)
12471 .n(n)
12472 .k(8)
12473 .iterations(1)
12474 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
12475 }
12476 }
12477 }
12478
12479 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_eq_8_subtile_m) {
12480 TEST_REQUIRES_X86_SSE41;
12481 for (uint32_t m = 1; m <= 1; m++) {
12482 GemmMicrokernelTester()
12483 .mr(1)
12484 .nr(4)
12485 .kr(2)
12486 .sr(1)
12487 .m(m)
12488 .n(4)
12489 .k(8)
12490 .iterations(1)
12491 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
12492 }
12493 }
12494
12495 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_eq_8_subtile_n) {
12496 TEST_REQUIRES_X86_SSE41;
12497 for (uint32_t n = 1; n <= 4; n++) {
12498 GemmMicrokernelTester()
12499 .mr(1)
12500 .nr(4)
12501 .kr(2)
12502 .sr(1)
12503 .m(1)
12504 .n(n)
12505 .k(8)
12506 .iterations(1)
12507 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
12508 }
12509 }
12510
12511 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_lt_8) {
12512 TEST_REQUIRES_X86_SSE41;
12513 for (size_t k = 1; k < 8; k++) {
12514 GemmMicrokernelTester()
12515 .mr(1)
12516 .nr(4)
12517 .kr(2)
12518 .sr(1)
12519 .m(1)
12520 .n(4)
12521 .k(k)
12522 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
12523 }
12524 }
12525
12526 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_lt_8_subtile) {
12527 TEST_REQUIRES_X86_SSE41;
12528 for (size_t k = 1; k < 8; k++) {
12529 for (uint32_t m = 1; m <= 1; m++) {
12530 for (uint32_t n = 1; n <= 4; n++) {
12531 GemmMicrokernelTester()
12532 .mr(1)
12533 .nr(4)
12534 .kr(2)
12535 .sr(1)
12536 .m(m)
12537 .n(n)
12538 .k(k)
12539 .iterations(1)
12540 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
12541 }
12542 }
12543 }
12544 }
12545
12546 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_gt_8) {
12547 TEST_REQUIRES_X86_SSE41;
12548 for (size_t k = 9; k < 16; k++) {
12549 GemmMicrokernelTester()
12550 .mr(1)
12551 .nr(4)
12552 .kr(2)
12553 .sr(1)
12554 .m(1)
12555 .n(4)
12556 .k(k)
12557 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
12558 }
12559 }
12560
12561 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_gt_8_subtile) {
12562 TEST_REQUIRES_X86_SSE41;
12563 for (size_t k = 9; k < 16; k++) {
12564 for (uint32_t m = 1; m <= 1; m++) {
12565 for (uint32_t n = 1; n <= 4; n++) {
12566 GemmMicrokernelTester()
12567 .mr(1)
12568 .nr(4)
12569 .kr(2)
12570 .sr(1)
12571 .m(m)
12572 .n(n)
12573 .k(k)
12574 .iterations(1)
12575 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
12576 }
12577 }
12578 }
12579 }
12580
12581 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_div_8) {
12582 TEST_REQUIRES_X86_SSE41;
12583 for (size_t k = 16; k <= 80; k += 8) {
12584 GemmMicrokernelTester()
12585 .mr(1)
12586 .nr(4)
12587 .kr(2)
12588 .sr(1)
12589 .m(1)
12590 .n(4)
12591 .k(k)
12592 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
12593 }
12594 }
12595
12596 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_div_8_subtile) {
12597 TEST_REQUIRES_X86_SSE41;
12598 for (size_t k = 16; k <= 80; k += 8) {
12599 for (uint32_t m = 1; m <= 1; m++) {
12600 for (uint32_t n = 1; n <= 4; n++) {
12601 GemmMicrokernelTester()
12602 .mr(1)
12603 .nr(4)
12604 .kr(2)
12605 .sr(1)
12606 .m(m)
12607 .n(n)
12608 .k(k)
12609 .iterations(1)
12610 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
12611 }
12612 }
12613 }
12614 }
12615
12616 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, n_gt_4) {
12617 TEST_REQUIRES_X86_SSE41;
12618 for (uint32_t n = 5; n < 8; n++) {
12619 for (size_t k = 1; k <= 40; k += 9) {
12620 GemmMicrokernelTester()
12621 .mr(1)
12622 .nr(4)
12623 .kr(2)
12624 .sr(1)
12625 .m(1)
12626 .n(4)
12627 .k(k)
12628 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
12629 }
12630 }
12631 }
12632
12633 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, n_gt_4_strided_cn) {
12634 TEST_REQUIRES_X86_SSE41;
12635 for (uint32_t n = 5; n < 8; n++) {
12636 for (size_t k = 1; k <= 40; k += 9) {
12637 GemmMicrokernelTester()
12638 .mr(1)
12639 .nr(4)
12640 .kr(2)
12641 .sr(1)
12642 .m(1)
12643 .n(4)
12644 .k(k)
12645 .cn_stride(7)
12646 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
12647 }
12648 }
12649 }
12650
12651 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, n_gt_4_subtile) {
12652 TEST_REQUIRES_X86_SSE41;
12653 for (uint32_t n = 5; n < 8; n++) {
12654 for (size_t k = 1; k <= 40; k += 9) {
12655 for (uint32_t m = 1; m <= 1; m++) {
12656 GemmMicrokernelTester()
12657 .mr(1)
12658 .nr(4)
12659 .kr(2)
12660 .sr(1)
12661 .m(m)
12662 .n(n)
12663 .k(k)
12664 .iterations(1)
12665 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
12666 }
12667 }
12668 }
12669 }
12670
12671 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, n_div_4) {
12672 TEST_REQUIRES_X86_SSE41;
12673 for (uint32_t n = 8; n <= 12; n += 4) {
12674 for (size_t k = 1; k <= 40; k += 9) {
12675 GemmMicrokernelTester()
12676 .mr(1)
12677 .nr(4)
12678 .kr(2)
12679 .sr(1)
12680 .m(1)
12681 .n(4)
12682 .k(k)
12683 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
12684 }
12685 }
12686 }
12687
12688 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, n_div_4_strided_cn) {
12689 TEST_REQUIRES_X86_SSE41;
12690 for (uint32_t n = 8; n <= 12; n += 4) {
12691 for (size_t k = 1; k <= 40; k += 9) {
12692 GemmMicrokernelTester()
12693 .mr(1)
12694 .nr(4)
12695 .kr(2)
12696 .sr(1)
12697 .m(1)
12698 .n(n)
12699 .k(k)
12700 .cn_stride(7)
12701 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
12702 }
12703 }
12704 }
12705
12706 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, n_div_4_subtile) {
12707 TEST_REQUIRES_X86_SSE41;
12708 for (uint32_t n = 8; n <= 12; n += 4) {
12709 for (size_t k = 1; k <= 40; k += 9) {
12710 for (uint32_t m = 1; m <= 1; m++) {
12711 GemmMicrokernelTester()
12712 .mr(1)
12713 .nr(4)
12714 .kr(2)
12715 .sr(1)
12716 .m(m)
12717 .n(n)
12718 .k(k)
12719 .iterations(1)
12720 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
12721 }
12722 }
12723 }
12724 }
12725
12726 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, small_kernel) {
12727 TEST_REQUIRES_X86_SSE41;
12728 for (size_t k = 1; k <= 40; k += 9) {
12729 GemmMicrokernelTester()
12730 .mr(1)
12731 .nr(4)
12732 .kr(2)
12733 .sr(1)
12734 .m(1)
12735 .n(4)
12736 .k(k)
12737 .ks(3)
12738 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
12739 }
12740 }
12741
12742 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, small_kernel_subtile) {
12743 TEST_REQUIRES_X86_SSE41;
12744 for (size_t k = 1; k <= 40; k += 9) {
12745 for (uint32_t m = 1; m <= 1; m++) {
12746 for (uint32_t n = 1; n <= 4; n++) {
12747 GemmMicrokernelTester()
12748 .mr(1)
12749 .nr(4)
12750 .kr(2)
12751 .sr(1)
12752 .m(m)
12753 .n(n)
12754 .k(k)
12755 .ks(3)
12756 .iterations(1)
12757 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
12758 }
12759 }
12760 }
12761 }
12762
12763 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, n_gt_4_small_kernel) {
12764 TEST_REQUIRES_X86_SSE41;
12765 for (uint32_t n = 5; n < 8; n++) {
12766 for (size_t k = 1; k <= 40; k += 9) {
12767 GemmMicrokernelTester()
12768 .mr(1)
12769 .nr(4)
12770 .kr(2)
12771 .sr(1)
12772 .m(1)
12773 .n(4)
12774 .k(k)
12775 .ks(3)
12776 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
12777 }
12778 }
12779 }
12780
12781 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, n_div_4_small_kernel) {
12782 TEST_REQUIRES_X86_SSE41;
12783 for (uint32_t n = 8; n <= 12; n += 4) {
12784 for (size_t k = 1; k <= 40; k += 9) {
12785 GemmMicrokernelTester()
12786 .mr(1)
12787 .nr(4)
12788 .kr(2)
12789 .sr(1)
12790 .m(1)
12791 .n(4)
12792 .k(k)
12793 .ks(3)
12794 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
12795 }
12796 }
12797 }
12798
12799 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, strided_cm_subtile) {
12800 TEST_REQUIRES_X86_SSE41;
12801 for (size_t k = 1; k <= 40; k += 9) {
12802 for (uint32_t m = 1; m <= 1; m++) {
12803 for (uint32_t n = 1; n <= 4; n++) {
12804 GemmMicrokernelTester()
12805 .mr(1)
12806 .nr(4)
12807 .kr(2)
12808 .sr(1)
12809 .m(m)
12810 .n(n)
12811 .k(k)
12812 .cm_stride(7)
12813 .iterations(1)
12814 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
12815 }
12816 }
12817 }
12818 }
12819
12820 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, a_offset) {
12821 TEST_REQUIRES_X86_SSE41;
12822 for (size_t k = 1; k <= 40; k += 9) {
12823 GemmMicrokernelTester()
12824 .mr(1)
12825 .nr(4)
12826 .kr(2)
12827 .sr(1)
12828 .m(1)
12829 .n(4)
12830 .k(k)
12831 .ks(3)
12832 .a_offset(43)
12833 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
12834 }
12835 }
12836
12837 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, zero) {
12838 TEST_REQUIRES_X86_SSE41;
12839 for (uint32_t mz = 0; mz < 1; mz++) {
12840 for (size_t k = 1; k <= 40; k += 9) {
12841 GemmMicrokernelTester()
12842 .mr(1)
12843 .nr(4)
12844 .kr(2)
12845 .sr(1)
12846 .m(1)
12847 .n(4)
12848 .k(k)
12849 .ks(3)
12850 .a_offset(43)
12851 .zero_index(mz)
12852 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
12853 }
12854 }
12855 }
12856
12857 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, qmin) {
12858 TEST_REQUIRES_X86_SSE41;
12859 GemmMicrokernelTester()
12860 .mr(1)
12861 .nr(4)
12862 .kr(2)
12863 .sr(1)
12864 .m(1)
12865 .n(4)
12866 .k(8)
12867 .qmin(128)
12868 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
12869 }
12870
12871 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, qmax) {
12872 TEST_REQUIRES_X86_SSE41;
12873 GemmMicrokernelTester()
12874 .mr(1)
12875 .nr(4)
12876 .kr(2)
12877 .sr(1)
12878 .m(1)
12879 .n(4)
12880 .k(8)
12881 .qmax(128)
12882 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
12883 }
12884
12885 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, strided_cm) {
12886 TEST_REQUIRES_X86_SSE41;
12887 GemmMicrokernelTester()
12888 .mr(1)
12889 .nr(4)
12890 .kr(2)
12891 .sr(1)
12892 .m(1)
12893 .n(4)
12894 .k(8)
12895 .cm_stride(7)
12896 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
12897 }
12898
12899 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, no_a_zero_point) {
12900 TEST_REQUIRES_X86_SSE41;
12901 for (size_t k = 1; k <= 40; k += 9) {
12902 GemmMicrokernelTester()
12903 .mr(1)
12904 .nr(4)
12905 .kr(2)
12906 .sr(1)
12907 .m(1)
12908 .n(4)
12909 .k(k)
12910 .a_zero_point(0)
12911 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
12912 }
12913 }
12914
12915 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, no_b_zero_point) {
12916 TEST_REQUIRES_X86_SSE41;
12917 for (size_t k = 1; k <= 40; k += 9) {
12918 GemmMicrokernelTester()
12919 .mr(1)
12920 .nr(4)
12921 .kr(2)
12922 .sr(1)
12923 .m(1)
12924 .n(4)
12925 .k(k)
12926 .b_zero_point(0)
12927 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
12928 }
12929 }
12930
12931 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, no_zero_point) {
12932 TEST_REQUIRES_X86_SSE41;
12933 for (size_t k = 1; k <= 40; k += 9) {
12934 GemmMicrokernelTester()
12935 .mr(1)
12936 .nr(4)
12937 .kr(2)
12938 .sr(1)
12939 .m(1)
12940 .n(4)
12941 .k(k)
12942 .a_zero_point(0)
12943 .b_zero_point(0)
12944 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
12945 }
12946 }
12947#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
12948
12949
12950#if XNN_ARCH_X86 || XNN_ARCH_X86_64
12951 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_eq_8) {
12952 TEST_REQUIRES_X86_SSE41;
12953 GemmMicrokernelTester()
12954 .mr(2)
12955 .nr(4)
12956 .kr(2)
12957 .sr(1)
12958 .m(2)
12959 .n(4)
12960 .k(8)
12961 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
12962 }
12963
12964 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, strided_cn) {
12965 TEST_REQUIRES_X86_SSE41;
12966 GemmMicrokernelTester()
12967 .mr(2)
12968 .nr(4)
12969 .kr(2)
12970 .sr(1)
12971 .m(2)
12972 .n(4)
12973 .k(8)
12974 .cn_stride(7)
12975 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
12976 }
12977
12978 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_eq_8_subtile) {
12979 TEST_REQUIRES_X86_SSE41;
12980 for (uint32_t m = 1; m <= 2; m++) {
12981 for (uint32_t n = 1; n <= 4; n++) {
12982 GemmMicrokernelTester()
12983 .mr(2)
12984 .nr(4)
12985 .kr(2)
12986 .sr(1)
12987 .m(m)
12988 .n(n)
12989 .k(8)
12990 .iterations(1)
12991 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
12992 }
12993 }
12994 }
12995
12996 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_eq_8_subtile_m) {
12997 TEST_REQUIRES_X86_SSE41;
12998 for (uint32_t m = 1; m <= 2; m++) {
12999 GemmMicrokernelTester()
13000 .mr(2)
13001 .nr(4)
13002 .kr(2)
13003 .sr(1)
13004 .m(m)
13005 .n(4)
13006 .k(8)
13007 .iterations(1)
13008 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
13009 }
13010 }
13011
13012 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_eq_8_subtile_n) {
13013 TEST_REQUIRES_X86_SSE41;
13014 for (uint32_t n = 1; n <= 4; n++) {
13015 GemmMicrokernelTester()
13016 .mr(2)
13017 .nr(4)
13018 .kr(2)
13019 .sr(1)
13020 .m(2)
13021 .n(n)
13022 .k(8)
13023 .iterations(1)
13024 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
13025 }
13026 }
13027
13028 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_lt_8) {
13029 TEST_REQUIRES_X86_SSE41;
13030 for (size_t k = 1; k < 8; k++) {
13031 GemmMicrokernelTester()
13032 .mr(2)
13033 .nr(4)
13034 .kr(2)
13035 .sr(1)
13036 .m(2)
13037 .n(4)
13038 .k(k)
13039 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
13040 }
13041 }
13042
13043 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_lt_8_subtile) {
13044 TEST_REQUIRES_X86_SSE41;
13045 for (size_t k = 1; k < 8; k++) {
13046 for (uint32_t m = 1; m <= 2; m++) {
13047 for (uint32_t n = 1; n <= 4; n++) {
13048 GemmMicrokernelTester()
13049 .mr(2)
13050 .nr(4)
13051 .kr(2)
13052 .sr(1)
13053 .m(m)
13054 .n(n)
13055 .k(k)
13056 .iterations(1)
13057 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
13058 }
13059 }
13060 }
13061 }
13062
13063 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_gt_8) {
13064 TEST_REQUIRES_X86_SSE41;
13065 for (size_t k = 9; k < 16; k++) {
13066 GemmMicrokernelTester()
13067 .mr(2)
13068 .nr(4)
13069 .kr(2)
13070 .sr(1)
13071 .m(2)
13072 .n(4)
13073 .k(k)
13074 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
13075 }
13076 }
13077
13078 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_gt_8_subtile) {
13079 TEST_REQUIRES_X86_SSE41;
13080 for (size_t k = 9; k < 16; k++) {
13081 for (uint32_t m = 1; m <= 2; m++) {
13082 for (uint32_t n = 1; n <= 4; n++) {
13083 GemmMicrokernelTester()
13084 .mr(2)
13085 .nr(4)
13086 .kr(2)
13087 .sr(1)
13088 .m(m)
13089 .n(n)
13090 .k(k)
13091 .iterations(1)
13092 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
13093 }
13094 }
13095 }
13096 }
13097
13098 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_div_8) {
13099 TEST_REQUIRES_X86_SSE41;
13100 for (size_t k = 16; k <= 80; k += 8) {
13101 GemmMicrokernelTester()
13102 .mr(2)
13103 .nr(4)
13104 .kr(2)
13105 .sr(1)
13106 .m(2)
13107 .n(4)
13108 .k(k)
13109 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
13110 }
13111 }
13112
13113 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_div_8_subtile) {
13114 TEST_REQUIRES_X86_SSE41;
13115 for (size_t k = 16; k <= 80; k += 8) {
13116 for (uint32_t m = 1; m <= 2; m++) {
13117 for (uint32_t n = 1; n <= 4; n++) {
13118 GemmMicrokernelTester()
13119 .mr(2)
13120 .nr(4)
13121 .kr(2)
13122 .sr(1)
13123 .m(m)
13124 .n(n)
13125 .k(k)
13126 .iterations(1)
13127 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
13128 }
13129 }
13130 }
13131 }
13132
13133 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, n_gt_4) {
13134 TEST_REQUIRES_X86_SSE41;
13135 for (uint32_t n = 5; n < 8; n++) {
13136 for (size_t k = 1; k <= 40; k += 9) {
13137 GemmMicrokernelTester()
13138 .mr(2)
13139 .nr(4)
13140 .kr(2)
13141 .sr(1)
13142 .m(2)
13143 .n(4)
13144 .k(k)
13145 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
13146 }
13147 }
13148 }
13149
13150 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, n_gt_4_strided_cn) {
13151 TEST_REQUIRES_X86_SSE41;
13152 for (uint32_t n = 5; n < 8; n++) {
13153 for (size_t k = 1; k <= 40; k += 9) {
13154 GemmMicrokernelTester()
13155 .mr(2)
13156 .nr(4)
13157 .kr(2)
13158 .sr(1)
13159 .m(2)
13160 .n(4)
13161 .k(k)
13162 .cn_stride(7)
13163 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
13164 }
13165 }
13166 }
13167
13168 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, n_gt_4_subtile) {
13169 TEST_REQUIRES_X86_SSE41;
13170 for (uint32_t n = 5; n < 8; n++) {
13171 for (size_t k = 1; k <= 40; k += 9) {
13172 for (uint32_t m = 1; m <= 2; m++) {
13173 GemmMicrokernelTester()
13174 .mr(2)
13175 .nr(4)
13176 .kr(2)
13177 .sr(1)
13178 .m(m)
13179 .n(n)
13180 .k(k)
13181 .iterations(1)
13182 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
13183 }
13184 }
13185 }
13186 }
13187
13188 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, n_div_4) {
13189 TEST_REQUIRES_X86_SSE41;
13190 for (uint32_t n = 8; n <= 12; n += 4) {
13191 for (size_t k = 1; k <= 40; k += 9) {
13192 GemmMicrokernelTester()
13193 .mr(2)
13194 .nr(4)
13195 .kr(2)
13196 .sr(1)
13197 .m(2)
13198 .n(4)
13199 .k(k)
13200 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
13201 }
13202 }
13203 }
13204
13205 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, n_div_4_strided_cn) {
13206 TEST_REQUIRES_X86_SSE41;
13207 for (uint32_t n = 8; n <= 12; n += 4) {
13208 for (size_t k = 1; k <= 40; k += 9) {
13209 GemmMicrokernelTester()
13210 .mr(2)
13211 .nr(4)
13212 .kr(2)
13213 .sr(1)
13214 .m(2)
13215 .n(n)
13216 .k(k)
13217 .cn_stride(7)
13218 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
13219 }
13220 }
13221 }
13222
13223 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, n_div_4_subtile) {
13224 TEST_REQUIRES_X86_SSE41;
13225 for (uint32_t n = 8; n <= 12; n += 4) {
13226 for (size_t k = 1; k <= 40; k += 9) {
13227 for (uint32_t m = 1; m <= 2; m++) {
13228 GemmMicrokernelTester()
13229 .mr(2)
13230 .nr(4)
13231 .kr(2)
13232 .sr(1)
13233 .m(m)
13234 .n(n)
13235 .k(k)
13236 .iterations(1)
13237 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
13238 }
13239 }
13240 }
13241 }
13242
13243 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, small_kernel) {
13244 TEST_REQUIRES_X86_SSE41;
13245 for (size_t k = 1; k <= 40; k += 9) {
13246 GemmMicrokernelTester()
13247 .mr(2)
13248 .nr(4)
13249 .kr(2)
13250 .sr(1)
13251 .m(2)
13252 .n(4)
13253 .k(k)
13254 .ks(3)
13255 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
13256 }
13257 }
13258
13259 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, small_kernel_subtile) {
13260 TEST_REQUIRES_X86_SSE41;
13261 for (size_t k = 1; k <= 40; k += 9) {
13262 for (uint32_t m = 1; m <= 2; m++) {
13263 for (uint32_t n = 1; n <= 4; n++) {
13264 GemmMicrokernelTester()
13265 .mr(2)
13266 .nr(4)
13267 .kr(2)
13268 .sr(1)
13269 .m(m)
13270 .n(n)
13271 .k(k)
13272 .ks(3)
13273 .iterations(1)
13274 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
13275 }
13276 }
13277 }
13278 }
13279
13280 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, n_gt_4_small_kernel) {
13281 TEST_REQUIRES_X86_SSE41;
13282 for (uint32_t n = 5; n < 8; n++) {
13283 for (size_t k = 1; k <= 40; k += 9) {
13284 GemmMicrokernelTester()
13285 .mr(2)
13286 .nr(4)
13287 .kr(2)
13288 .sr(1)
13289 .m(2)
13290 .n(4)
13291 .k(k)
13292 .ks(3)
13293 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
13294 }
13295 }
13296 }
13297
13298 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, n_div_4_small_kernel) {
13299 TEST_REQUIRES_X86_SSE41;
13300 for (uint32_t n = 8; n <= 12; n += 4) {
13301 for (size_t k = 1; k <= 40; k += 9) {
13302 GemmMicrokernelTester()
13303 .mr(2)
13304 .nr(4)
13305 .kr(2)
13306 .sr(1)
13307 .m(2)
13308 .n(4)
13309 .k(k)
13310 .ks(3)
13311 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
13312 }
13313 }
13314 }
13315
13316 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, strided_cm_subtile) {
13317 TEST_REQUIRES_X86_SSE41;
13318 for (size_t k = 1; k <= 40; k += 9) {
13319 for (uint32_t m = 1; m <= 2; m++) {
13320 for (uint32_t n = 1; n <= 4; n++) {
13321 GemmMicrokernelTester()
13322 .mr(2)
13323 .nr(4)
13324 .kr(2)
13325 .sr(1)
13326 .m(m)
13327 .n(n)
13328 .k(k)
13329 .cm_stride(7)
13330 .iterations(1)
13331 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
13332 }
13333 }
13334 }
13335 }
13336
13337 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, a_offset) {
13338 TEST_REQUIRES_X86_SSE41;
13339 for (size_t k = 1; k <= 40; k += 9) {
13340 GemmMicrokernelTester()
13341 .mr(2)
13342 .nr(4)
13343 .kr(2)
13344 .sr(1)
13345 .m(2)
13346 .n(4)
13347 .k(k)
13348 .ks(3)
13349 .a_offset(83)
13350 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
13351 }
13352 }
13353
13354 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, zero) {
13355 TEST_REQUIRES_X86_SSE41;
13356 for (uint32_t mz = 0; mz < 2; mz++) {
13357 for (size_t k = 1; k <= 40; k += 9) {
13358 GemmMicrokernelTester()
13359 .mr(2)
13360 .nr(4)
13361 .kr(2)
13362 .sr(1)
13363 .m(2)
13364 .n(4)
13365 .k(k)
13366 .ks(3)
13367 .a_offset(83)
13368 .zero_index(mz)
13369 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
13370 }
13371 }
13372 }
13373
13374 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, qmin) {
13375 TEST_REQUIRES_X86_SSE41;
13376 GemmMicrokernelTester()
13377 .mr(2)
13378 .nr(4)
13379 .kr(2)
13380 .sr(1)
13381 .m(2)
13382 .n(4)
13383 .k(8)
13384 .qmin(128)
13385 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
13386 }
13387
13388 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, qmax) {
13389 TEST_REQUIRES_X86_SSE41;
13390 GemmMicrokernelTester()
13391 .mr(2)
13392 .nr(4)
13393 .kr(2)
13394 .sr(1)
13395 .m(2)
13396 .n(4)
13397 .k(8)
13398 .qmax(128)
13399 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
13400 }
13401
13402 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, strided_cm) {
13403 TEST_REQUIRES_X86_SSE41;
13404 GemmMicrokernelTester()
13405 .mr(2)
13406 .nr(4)
13407 .kr(2)
13408 .sr(1)
13409 .m(2)
13410 .n(4)
13411 .k(8)
13412 .cm_stride(7)
13413 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
13414 }
13415
13416 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, no_a_zero_point) {
13417 TEST_REQUIRES_X86_SSE41;
13418 for (size_t k = 1; k <= 40; k += 9) {
13419 GemmMicrokernelTester()
13420 .mr(2)
13421 .nr(4)
13422 .kr(2)
13423 .sr(1)
13424 .m(2)
13425 .n(4)
13426 .k(k)
13427 .a_zero_point(0)
13428 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
13429 }
13430 }
13431
13432 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, no_b_zero_point) {
13433 TEST_REQUIRES_X86_SSE41;
13434 for (size_t k = 1; k <= 40; k += 9) {
13435 GemmMicrokernelTester()
13436 .mr(2)
13437 .nr(4)
13438 .kr(2)
13439 .sr(1)
13440 .m(2)
13441 .n(4)
13442 .k(k)
13443 .b_zero_point(0)
13444 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
13445 }
13446 }
13447
13448 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, no_zero_point) {
13449 TEST_REQUIRES_X86_SSE41;
13450 for (size_t k = 1; k <= 40; k += 9) {
13451 GemmMicrokernelTester()
13452 .mr(2)
13453 .nr(4)
13454 .kr(2)
13455 .sr(1)
13456 .m(2)
13457 .n(4)
13458 .k(k)
13459 .a_zero_point(0)
13460 .b_zero_point(0)
13461 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
13462 }
13463 }
13464#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
13465
13466
13467#if XNN_ARCH_X86 || XNN_ARCH_X86_64
13468 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_eq_8) {
13469 TEST_REQUIRES_X86_SSE41;
13470 GemmMicrokernelTester()
13471 .mr(3)
13472 .nr(4)
13473 .kr(2)
13474 .sr(1)
13475 .m(3)
13476 .n(4)
13477 .k(8)
13478 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
13479 }
13480
13481 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, strided_cn) {
13482 TEST_REQUIRES_X86_SSE41;
13483 GemmMicrokernelTester()
13484 .mr(3)
13485 .nr(4)
13486 .kr(2)
13487 .sr(1)
13488 .m(3)
13489 .n(4)
13490 .k(8)
13491 .cn_stride(7)
13492 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
13493 }
13494
13495 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_eq_8_subtile) {
13496 TEST_REQUIRES_X86_SSE41;
13497 for (uint32_t m = 1; m <= 3; m++) {
13498 for (uint32_t n = 1; n <= 4; n++) {
13499 GemmMicrokernelTester()
13500 .mr(3)
13501 .nr(4)
13502 .kr(2)
13503 .sr(1)
13504 .m(m)
13505 .n(n)
13506 .k(8)
13507 .iterations(1)
13508 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
13509 }
13510 }
13511 }
13512
13513 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_eq_8_subtile_m) {
13514 TEST_REQUIRES_X86_SSE41;
13515 for (uint32_t m = 1; m <= 3; m++) {
13516 GemmMicrokernelTester()
13517 .mr(3)
13518 .nr(4)
13519 .kr(2)
13520 .sr(1)
13521 .m(m)
13522 .n(4)
13523 .k(8)
13524 .iterations(1)
13525 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
13526 }
13527 }
13528
13529 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_eq_8_subtile_n) {
13530 TEST_REQUIRES_X86_SSE41;
13531 for (uint32_t n = 1; n <= 4; n++) {
13532 GemmMicrokernelTester()
13533 .mr(3)
13534 .nr(4)
13535 .kr(2)
13536 .sr(1)
13537 .m(3)
13538 .n(n)
13539 .k(8)
13540 .iterations(1)
13541 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
13542 }
13543 }
13544
13545 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_lt_8) {
13546 TEST_REQUIRES_X86_SSE41;
13547 for (size_t k = 1; k < 8; k++) {
13548 GemmMicrokernelTester()
13549 .mr(3)
13550 .nr(4)
13551 .kr(2)
13552 .sr(1)
13553 .m(3)
13554 .n(4)
13555 .k(k)
13556 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
13557 }
13558 }
13559
13560 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_lt_8_subtile) {
13561 TEST_REQUIRES_X86_SSE41;
13562 for (size_t k = 1; k < 8; k++) {
13563 for (uint32_t m = 1; m <= 3; m++) {
13564 for (uint32_t n = 1; n <= 4; n++) {
13565 GemmMicrokernelTester()
13566 .mr(3)
13567 .nr(4)
13568 .kr(2)
13569 .sr(1)
13570 .m(m)
13571 .n(n)
13572 .k(k)
13573 .iterations(1)
13574 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
13575 }
13576 }
13577 }
13578 }
13579
13580 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_gt_8) {
13581 TEST_REQUIRES_X86_SSE41;
13582 for (size_t k = 9; k < 16; k++) {
13583 GemmMicrokernelTester()
13584 .mr(3)
13585 .nr(4)
13586 .kr(2)
13587 .sr(1)
13588 .m(3)
13589 .n(4)
13590 .k(k)
13591 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
13592 }
13593 }
13594
13595 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_gt_8_subtile) {
13596 TEST_REQUIRES_X86_SSE41;
13597 for (size_t k = 9; k < 16; k++) {
13598 for (uint32_t m = 1; m <= 3; m++) {
13599 for (uint32_t n = 1; n <= 4; n++) {
13600 GemmMicrokernelTester()
13601 .mr(3)
13602 .nr(4)
13603 .kr(2)
13604 .sr(1)
13605 .m(m)
13606 .n(n)
13607 .k(k)
13608 .iterations(1)
13609 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
13610 }
13611 }
13612 }
13613 }
13614
13615 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_div_8) {
13616 TEST_REQUIRES_X86_SSE41;
13617 for (size_t k = 16; k <= 80; k += 8) {
13618 GemmMicrokernelTester()
13619 .mr(3)
13620 .nr(4)
13621 .kr(2)
13622 .sr(1)
13623 .m(3)
13624 .n(4)
13625 .k(k)
13626 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
13627 }
13628 }
13629
13630 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_div_8_subtile) {
13631 TEST_REQUIRES_X86_SSE41;
13632 for (size_t k = 16; k <= 80; k += 8) {
13633 for (uint32_t m = 1; m <= 3; m++) {
13634 for (uint32_t n = 1; n <= 4; n++) {
13635 GemmMicrokernelTester()
13636 .mr(3)
13637 .nr(4)
13638 .kr(2)
13639 .sr(1)
13640 .m(m)
13641 .n(n)
13642 .k(k)
13643 .iterations(1)
13644 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
13645 }
13646 }
13647 }
13648 }
13649
13650 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_gt_4) {
13651 TEST_REQUIRES_X86_SSE41;
13652 for (uint32_t n = 5; n < 8; n++) {
13653 for (size_t k = 1; k <= 40; k += 9) {
13654 GemmMicrokernelTester()
13655 .mr(3)
13656 .nr(4)
13657 .kr(2)
13658 .sr(1)
13659 .m(3)
13660 .n(4)
13661 .k(k)
13662 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
13663 }
13664 }
13665 }
13666
13667 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_gt_4_strided_cn) {
13668 TEST_REQUIRES_X86_SSE41;
13669 for (uint32_t n = 5; n < 8; n++) {
13670 for (size_t k = 1; k <= 40; k += 9) {
13671 GemmMicrokernelTester()
13672 .mr(3)
13673 .nr(4)
13674 .kr(2)
13675 .sr(1)
13676 .m(3)
13677 .n(4)
13678 .k(k)
13679 .cn_stride(7)
13680 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
13681 }
13682 }
13683 }
13684
13685 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_gt_4_subtile) {
13686 TEST_REQUIRES_X86_SSE41;
13687 for (uint32_t n = 5; n < 8; n++) {
13688 for (size_t k = 1; k <= 40; k += 9) {
13689 for (uint32_t m = 1; m <= 3; m++) {
13690 GemmMicrokernelTester()
13691 .mr(3)
13692 .nr(4)
13693 .kr(2)
13694 .sr(1)
13695 .m(m)
13696 .n(n)
13697 .k(k)
13698 .iterations(1)
13699 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
13700 }
13701 }
13702 }
13703 }
13704
13705 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_div_4) {
13706 TEST_REQUIRES_X86_SSE41;
13707 for (uint32_t n = 8; n <= 12; n += 4) {
13708 for (size_t k = 1; k <= 40; k += 9) {
13709 GemmMicrokernelTester()
13710 .mr(3)
13711 .nr(4)
13712 .kr(2)
13713 .sr(1)
13714 .m(3)
13715 .n(4)
13716 .k(k)
13717 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
13718 }
13719 }
13720 }
13721
13722 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_div_4_strided_cn) {
13723 TEST_REQUIRES_X86_SSE41;
13724 for (uint32_t n = 8; n <= 12; n += 4) {
13725 for (size_t k = 1; k <= 40; k += 9) {
13726 GemmMicrokernelTester()
13727 .mr(3)
13728 .nr(4)
13729 .kr(2)
13730 .sr(1)
13731 .m(3)
13732 .n(n)
13733 .k(k)
13734 .cn_stride(7)
13735 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
13736 }
13737 }
13738 }
13739
13740 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_div_4_subtile) {
13741 TEST_REQUIRES_X86_SSE41;
13742 for (uint32_t n = 8; n <= 12; n += 4) {
13743 for (size_t k = 1; k <= 40; k += 9) {
13744 for (uint32_t m = 1; m <= 3; m++) {
13745 GemmMicrokernelTester()
13746 .mr(3)
13747 .nr(4)
13748 .kr(2)
13749 .sr(1)
13750 .m(m)
13751 .n(n)
13752 .k(k)
13753 .iterations(1)
13754 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
13755 }
13756 }
13757 }
13758 }
13759
13760 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, small_kernel) {
13761 TEST_REQUIRES_X86_SSE41;
13762 for (size_t k = 1; k <= 40; k += 9) {
13763 GemmMicrokernelTester()
13764 .mr(3)
13765 .nr(4)
13766 .kr(2)
13767 .sr(1)
13768 .m(3)
13769 .n(4)
13770 .k(k)
13771 .ks(3)
13772 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
13773 }
13774 }
13775
13776 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, small_kernel_subtile) {
13777 TEST_REQUIRES_X86_SSE41;
13778 for (size_t k = 1; k <= 40; k += 9) {
13779 for (uint32_t m = 1; m <= 3; m++) {
13780 for (uint32_t n = 1; n <= 4; n++) {
13781 GemmMicrokernelTester()
13782 .mr(3)
13783 .nr(4)
13784 .kr(2)
13785 .sr(1)
13786 .m(m)
13787 .n(n)
13788 .k(k)
13789 .ks(3)
13790 .iterations(1)
13791 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
13792 }
13793 }
13794 }
13795 }
13796
13797 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_gt_4_small_kernel) {
13798 TEST_REQUIRES_X86_SSE41;
13799 for (uint32_t n = 5; n < 8; n++) {
13800 for (size_t k = 1; k <= 40; k += 9) {
13801 GemmMicrokernelTester()
13802 .mr(3)
13803 .nr(4)
13804 .kr(2)
13805 .sr(1)
13806 .m(3)
13807 .n(4)
13808 .k(k)
13809 .ks(3)
13810 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
13811 }
13812 }
13813 }
13814
13815 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_div_4_small_kernel) {
13816 TEST_REQUIRES_X86_SSE41;
13817 for (uint32_t n = 8; n <= 12; n += 4) {
13818 for (size_t k = 1; k <= 40; k += 9) {
13819 GemmMicrokernelTester()
13820 .mr(3)
13821 .nr(4)
13822 .kr(2)
13823 .sr(1)
13824 .m(3)
13825 .n(4)
13826 .k(k)
13827 .ks(3)
13828 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
13829 }
13830 }
13831 }
13832
13833 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, strided_cm_subtile) {
13834 TEST_REQUIRES_X86_SSE41;
13835 for (size_t k = 1; k <= 40; k += 9) {
13836 for (uint32_t m = 1; m <= 3; m++) {
13837 for (uint32_t n = 1; n <= 4; n++) {
13838 GemmMicrokernelTester()
13839 .mr(3)
13840 .nr(4)
13841 .kr(2)
13842 .sr(1)
13843 .m(m)
13844 .n(n)
13845 .k(k)
13846 .cm_stride(7)
13847 .iterations(1)
13848 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
13849 }
13850 }
13851 }
13852 }
13853
13854 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, a_offset) {
13855 TEST_REQUIRES_X86_SSE41;
13856 for (size_t k = 1; k <= 40; k += 9) {
13857 GemmMicrokernelTester()
13858 .mr(3)
13859 .nr(4)
13860 .kr(2)
13861 .sr(1)
13862 .m(3)
13863 .n(4)
13864 .k(k)
13865 .ks(3)
13866 .a_offset(127)
13867 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
13868 }
13869 }
13870
13871 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, zero) {
13872 TEST_REQUIRES_X86_SSE41;
13873 for (uint32_t mz = 0; mz < 3; mz++) {
13874 for (size_t k = 1; k <= 40; k += 9) {
13875 GemmMicrokernelTester()
13876 .mr(3)
13877 .nr(4)
13878 .kr(2)
13879 .sr(1)
13880 .m(3)
13881 .n(4)
13882 .k(k)
13883 .ks(3)
13884 .a_offset(127)
13885 .zero_index(mz)
13886 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
13887 }
13888 }
13889 }
13890
13891 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, qmin) {
13892 TEST_REQUIRES_X86_SSE41;
13893 GemmMicrokernelTester()
13894 .mr(3)
13895 .nr(4)
13896 .kr(2)
13897 .sr(1)
13898 .m(3)
13899 .n(4)
13900 .k(8)
13901 .qmin(128)
13902 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
13903 }
13904
13905 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, qmax) {
13906 TEST_REQUIRES_X86_SSE41;
13907 GemmMicrokernelTester()
13908 .mr(3)
13909 .nr(4)
13910 .kr(2)
13911 .sr(1)
13912 .m(3)
13913 .n(4)
13914 .k(8)
13915 .qmax(128)
13916 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
13917 }
13918
13919 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, strided_cm) {
13920 TEST_REQUIRES_X86_SSE41;
13921 GemmMicrokernelTester()
13922 .mr(3)
13923 .nr(4)
13924 .kr(2)
13925 .sr(1)
13926 .m(3)
13927 .n(4)
13928 .k(8)
13929 .cm_stride(7)
13930 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
13931 }
13932
13933 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, no_a_zero_point) {
13934 TEST_REQUIRES_X86_SSE41;
13935 for (size_t k = 1; k <= 40; k += 9) {
13936 GemmMicrokernelTester()
13937 .mr(3)
13938 .nr(4)
13939 .kr(2)
13940 .sr(1)
13941 .m(3)
13942 .n(4)
13943 .k(k)
13944 .a_zero_point(0)
13945 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
13946 }
13947 }
13948
13949 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, no_b_zero_point) {
13950 TEST_REQUIRES_X86_SSE41;
13951 for (size_t k = 1; k <= 40; k += 9) {
13952 GemmMicrokernelTester()
13953 .mr(3)
13954 .nr(4)
13955 .kr(2)
13956 .sr(1)
13957 .m(3)
13958 .n(4)
13959 .k(k)
13960 .b_zero_point(0)
13961 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
13962 }
13963 }
13964
13965 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, no_zero_point) {
13966 TEST_REQUIRES_X86_SSE41;
13967 for (size_t k = 1; k <= 40; k += 9) {
13968 GemmMicrokernelTester()
13969 .mr(3)
13970 .nr(4)
13971 .kr(2)
13972 .sr(1)
13973 .m(3)
13974 .n(4)
13975 .k(k)
13976 .a_zero_point(0)
13977 .b_zero_point(0)
13978 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
13979 }
13980 }
13981#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
13982
13983
13984#if XNN_ARCH_X86 || XNN_ARCH_X86_64
13985 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_eq_8) {
13986 TEST_REQUIRES_X86_SSE41;
13987 GemmMicrokernelTester()
13988 .mr(4)
13989 .nr(4)
13990 .kr(2)
13991 .sr(1)
13992 .m(4)
13993 .n(4)
13994 .k(8)
13995 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
13996 }
13997
13998 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, strided_cn) {
13999 TEST_REQUIRES_X86_SSE41;
14000 GemmMicrokernelTester()
14001 .mr(4)
14002 .nr(4)
14003 .kr(2)
14004 .sr(1)
14005 .m(4)
14006 .n(4)
14007 .k(8)
14008 .cn_stride(7)
14009 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
14010 }
14011
14012 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_eq_8_subtile) {
14013 TEST_REQUIRES_X86_SSE41;
14014 for (uint32_t m = 1; m <= 4; m++) {
14015 for (uint32_t n = 1; n <= 4; n++) {
14016 GemmMicrokernelTester()
14017 .mr(4)
14018 .nr(4)
14019 .kr(2)
14020 .sr(1)
14021 .m(m)
14022 .n(n)
14023 .k(8)
14024 .iterations(1)
14025 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
14026 }
14027 }
14028 }
14029
14030 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_eq_8_subtile_m) {
14031 TEST_REQUIRES_X86_SSE41;
14032 for (uint32_t m = 1; m <= 4; m++) {
14033 GemmMicrokernelTester()
14034 .mr(4)
14035 .nr(4)
14036 .kr(2)
14037 .sr(1)
14038 .m(m)
14039 .n(4)
14040 .k(8)
14041 .iterations(1)
14042 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
14043 }
14044 }
14045
14046 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_eq_8_subtile_n) {
14047 TEST_REQUIRES_X86_SSE41;
14048 for (uint32_t n = 1; n <= 4; n++) {
14049 GemmMicrokernelTester()
14050 .mr(4)
14051 .nr(4)
14052 .kr(2)
14053 .sr(1)
14054 .m(4)
14055 .n(n)
14056 .k(8)
14057 .iterations(1)
14058 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
14059 }
14060 }
14061
14062 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_lt_8) {
14063 TEST_REQUIRES_X86_SSE41;
14064 for (size_t k = 1; k < 8; k++) {
14065 GemmMicrokernelTester()
14066 .mr(4)
14067 .nr(4)
14068 .kr(2)
14069 .sr(1)
14070 .m(4)
14071 .n(4)
14072 .k(k)
14073 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
14074 }
14075 }
14076
14077 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_lt_8_subtile) {
14078 TEST_REQUIRES_X86_SSE41;
14079 for (size_t k = 1; k < 8; k++) {
14080 for (uint32_t m = 1; m <= 4; m++) {
14081 for (uint32_t n = 1; n <= 4; n++) {
14082 GemmMicrokernelTester()
14083 .mr(4)
14084 .nr(4)
14085 .kr(2)
14086 .sr(1)
14087 .m(m)
14088 .n(n)
14089 .k(k)
14090 .iterations(1)
14091 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
14092 }
14093 }
14094 }
14095 }
14096
14097 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_gt_8) {
14098 TEST_REQUIRES_X86_SSE41;
14099 for (size_t k = 9; k < 16; k++) {
14100 GemmMicrokernelTester()
14101 .mr(4)
14102 .nr(4)
14103 .kr(2)
14104 .sr(1)
14105 .m(4)
14106 .n(4)
14107 .k(k)
14108 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
14109 }
14110 }
14111
14112 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_gt_8_subtile) {
14113 TEST_REQUIRES_X86_SSE41;
14114 for (size_t k = 9; k < 16; k++) {
14115 for (uint32_t m = 1; m <= 4; m++) {
14116 for (uint32_t n = 1; n <= 4; n++) {
14117 GemmMicrokernelTester()
14118 .mr(4)
14119 .nr(4)
14120 .kr(2)
14121 .sr(1)
14122 .m(m)
14123 .n(n)
14124 .k(k)
14125 .iterations(1)
14126 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
14127 }
14128 }
14129 }
14130 }
14131
14132 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_div_8) {
14133 TEST_REQUIRES_X86_SSE41;
14134 for (size_t k = 16; k <= 80; k += 8) {
14135 GemmMicrokernelTester()
14136 .mr(4)
14137 .nr(4)
14138 .kr(2)
14139 .sr(1)
14140 .m(4)
14141 .n(4)
14142 .k(k)
14143 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
14144 }
14145 }
14146
14147 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_div_8_subtile) {
14148 TEST_REQUIRES_X86_SSE41;
14149 for (size_t k = 16; k <= 80; k += 8) {
14150 for (uint32_t m = 1; m <= 4; m++) {
14151 for (uint32_t n = 1; n <= 4; n++) {
14152 GemmMicrokernelTester()
14153 .mr(4)
14154 .nr(4)
14155 .kr(2)
14156 .sr(1)
14157 .m(m)
14158 .n(n)
14159 .k(k)
14160 .iterations(1)
14161 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
14162 }
14163 }
14164 }
14165 }
14166
14167 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_gt_4) {
14168 TEST_REQUIRES_X86_SSE41;
14169 for (uint32_t n = 5; n < 8; n++) {
14170 for (size_t k = 1; k <= 40; k += 9) {
14171 GemmMicrokernelTester()
14172 .mr(4)
14173 .nr(4)
14174 .kr(2)
14175 .sr(1)
14176 .m(4)
14177 .n(4)
14178 .k(k)
14179 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
14180 }
14181 }
14182 }
14183
14184 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_gt_4_strided_cn) {
14185 TEST_REQUIRES_X86_SSE41;
14186 for (uint32_t n = 5; n < 8; n++) {
14187 for (size_t k = 1; k <= 40; k += 9) {
14188 GemmMicrokernelTester()
14189 .mr(4)
14190 .nr(4)
14191 .kr(2)
14192 .sr(1)
14193 .m(4)
14194 .n(4)
14195 .k(k)
14196 .cn_stride(7)
14197 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
14198 }
14199 }
14200 }
14201
14202 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_gt_4_subtile) {
14203 TEST_REQUIRES_X86_SSE41;
14204 for (uint32_t n = 5; n < 8; n++) {
14205 for (size_t k = 1; k <= 40; k += 9) {
14206 for (uint32_t m = 1; m <= 4; m++) {
14207 GemmMicrokernelTester()
14208 .mr(4)
14209 .nr(4)
14210 .kr(2)
14211 .sr(1)
14212 .m(m)
14213 .n(n)
14214 .k(k)
14215 .iterations(1)
14216 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
14217 }
14218 }
14219 }
14220 }
14221
14222 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_div_4) {
14223 TEST_REQUIRES_X86_SSE41;
14224 for (uint32_t n = 8; n <= 12; n += 4) {
14225 for (size_t k = 1; k <= 40; k += 9) {
14226 GemmMicrokernelTester()
14227 .mr(4)
14228 .nr(4)
14229 .kr(2)
14230 .sr(1)
14231 .m(4)
14232 .n(4)
14233 .k(k)
14234 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
14235 }
14236 }
14237 }
14238
14239 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_div_4_strided_cn) {
14240 TEST_REQUIRES_X86_SSE41;
14241 for (uint32_t n = 8; n <= 12; n += 4) {
14242 for (size_t k = 1; k <= 40; k += 9) {
14243 GemmMicrokernelTester()
14244 .mr(4)
14245 .nr(4)
14246 .kr(2)
14247 .sr(1)
14248 .m(4)
14249 .n(n)
14250 .k(k)
14251 .cn_stride(7)
14252 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
14253 }
14254 }
14255 }
14256
14257 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_div_4_subtile) {
14258 TEST_REQUIRES_X86_SSE41;
14259 for (uint32_t n = 8; n <= 12; n += 4) {
14260 for (size_t k = 1; k <= 40; k += 9) {
14261 for (uint32_t m = 1; m <= 4; m++) {
14262 GemmMicrokernelTester()
14263 .mr(4)
14264 .nr(4)
14265 .kr(2)
14266 .sr(1)
14267 .m(m)
14268 .n(n)
14269 .k(k)
14270 .iterations(1)
14271 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
14272 }
14273 }
14274 }
14275 }
14276
14277 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, small_kernel) {
14278 TEST_REQUIRES_X86_SSE41;
14279 for (size_t k = 1; k <= 40; k += 9) {
14280 GemmMicrokernelTester()
14281 .mr(4)
14282 .nr(4)
14283 .kr(2)
14284 .sr(1)
14285 .m(4)
14286 .n(4)
14287 .k(k)
14288 .ks(3)
14289 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
14290 }
14291 }
14292
14293 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, small_kernel_subtile) {
14294 TEST_REQUIRES_X86_SSE41;
14295 for (size_t k = 1; k <= 40; k += 9) {
14296 for (uint32_t m = 1; m <= 4; m++) {
14297 for (uint32_t n = 1; n <= 4; n++) {
14298 GemmMicrokernelTester()
14299 .mr(4)
14300 .nr(4)
14301 .kr(2)
14302 .sr(1)
14303 .m(m)
14304 .n(n)
14305 .k(k)
14306 .ks(3)
14307 .iterations(1)
14308 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
14309 }
14310 }
14311 }
14312 }
14313
14314 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_gt_4_small_kernel) {
14315 TEST_REQUIRES_X86_SSE41;
14316 for (uint32_t n = 5; n < 8; n++) {
14317 for (size_t k = 1; k <= 40; k += 9) {
14318 GemmMicrokernelTester()
14319 .mr(4)
14320 .nr(4)
14321 .kr(2)
14322 .sr(1)
14323 .m(4)
14324 .n(4)
14325 .k(k)
14326 .ks(3)
14327 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
14328 }
14329 }
14330 }
14331
14332 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_div_4_small_kernel) {
14333 TEST_REQUIRES_X86_SSE41;
14334 for (uint32_t n = 8; n <= 12; n += 4) {
14335 for (size_t k = 1; k <= 40; k += 9) {
14336 GemmMicrokernelTester()
14337 .mr(4)
14338 .nr(4)
14339 .kr(2)
14340 .sr(1)
14341 .m(4)
14342 .n(4)
14343 .k(k)
14344 .ks(3)
14345 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
14346 }
14347 }
14348 }
14349
14350 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, strided_cm_subtile) {
14351 TEST_REQUIRES_X86_SSE41;
14352 for (size_t k = 1; k <= 40; k += 9) {
14353 for (uint32_t m = 1; m <= 4; m++) {
14354 for (uint32_t n = 1; n <= 4; n++) {
14355 GemmMicrokernelTester()
14356 .mr(4)
14357 .nr(4)
14358 .kr(2)
14359 .sr(1)
14360 .m(m)
14361 .n(n)
14362 .k(k)
14363 .cm_stride(7)
14364 .iterations(1)
14365 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
14366 }
14367 }
14368 }
14369 }
14370
14371 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, a_offset) {
14372 TEST_REQUIRES_X86_SSE41;
14373 for (size_t k = 1; k <= 40; k += 9) {
14374 GemmMicrokernelTester()
14375 .mr(4)
14376 .nr(4)
14377 .kr(2)
14378 .sr(1)
14379 .m(4)
14380 .n(4)
14381 .k(k)
14382 .ks(3)
14383 .a_offset(163)
14384 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
14385 }
14386 }
14387
14388 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, zero) {
14389 TEST_REQUIRES_X86_SSE41;
14390 for (uint32_t mz = 0; mz < 4; mz++) {
14391 for (size_t k = 1; k <= 40; k += 9) {
14392 GemmMicrokernelTester()
14393 .mr(4)
14394 .nr(4)
14395 .kr(2)
14396 .sr(1)
14397 .m(4)
14398 .n(4)
14399 .k(k)
14400 .ks(3)
14401 .a_offset(163)
14402 .zero_index(mz)
14403 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
14404 }
14405 }
14406 }
14407
14408 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, qmin) {
14409 TEST_REQUIRES_X86_SSE41;
14410 GemmMicrokernelTester()
14411 .mr(4)
14412 .nr(4)
14413 .kr(2)
14414 .sr(1)
14415 .m(4)
14416 .n(4)
14417 .k(8)
14418 .qmin(128)
14419 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
14420 }
14421
14422 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, qmax) {
14423 TEST_REQUIRES_X86_SSE41;
14424 GemmMicrokernelTester()
14425 .mr(4)
14426 .nr(4)
14427 .kr(2)
14428 .sr(1)
14429 .m(4)
14430 .n(4)
14431 .k(8)
14432 .qmax(128)
14433 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
14434 }
14435
14436 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, strided_cm) {
14437 TEST_REQUIRES_X86_SSE41;
14438 GemmMicrokernelTester()
14439 .mr(4)
14440 .nr(4)
14441 .kr(2)
14442 .sr(1)
14443 .m(4)
14444 .n(4)
14445 .k(8)
14446 .cm_stride(7)
14447 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
14448 }
14449
14450 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, no_a_zero_point) {
14451 TEST_REQUIRES_X86_SSE41;
14452 for (size_t k = 1; k <= 40; k += 9) {
14453 GemmMicrokernelTester()
14454 .mr(4)
14455 .nr(4)
14456 .kr(2)
14457 .sr(1)
14458 .m(4)
14459 .n(4)
14460 .k(k)
14461 .a_zero_point(0)
14462 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
14463 }
14464 }
14465
14466 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, no_b_zero_point) {
14467 TEST_REQUIRES_X86_SSE41;
14468 for (size_t k = 1; k <= 40; k += 9) {
14469 GemmMicrokernelTester()
14470 .mr(4)
14471 .nr(4)
14472 .kr(2)
14473 .sr(1)
14474 .m(4)
14475 .n(4)
14476 .k(k)
14477 .b_zero_point(0)
14478 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
14479 }
14480 }
14481
14482 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, no_zero_point) {
14483 TEST_REQUIRES_X86_SSE41;
14484 for (size_t k = 1; k <= 40; k += 9) {
14485 GemmMicrokernelTester()
14486 .mr(4)
14487 .nr(4)
14488 .kr(2)
14489 .sr(1)
14490 .m(4)
14491 .n(4)
14492 .k(k)
14493 .a_zero_point(0)
14494 .b_zero_point(0)
14495 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
14496 }
14497 }
14498#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
14499
14500
14501#if XNN_ARCH_X86 || XNN_ARCH_X86_64
14502 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_eq_8) {
14503 TEST_REQUIRES_X86_AVX;
14504 GemmMicrokernelTester()
14505 .mr(1)
14506 .nr(4)
14507 .kr(2)
14508 .sr(1)
14509 .m(1)
14510 .n(4)
14511 .k(8)
14512 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
14513 }
14514
14515 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, strided_cn) {
14516 TEST_REQUIRES_X86_AVX;
14517 GemmMicrokernelTester()
14518 .mr(1)
14519 .nr(4)
14520 .kr(2)
14521 .sr(1)
14522 .m(1)
14523 .n(4)
14524 .k(8)
14525 .cn_stride(7)
14526 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
14527 }
14528
14529 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_eq_8_subtile) {
14530 TEST_REQUIRES_X86_AVX;
14531 for (uint32_t m = 1; m <= 1; m++) {
14532 for (uint32_t n = 1; n <= 4; n++) {
14533 GemmMicrokernelTester()
14534 .mr(1)
14535 .nr(4)
14536 .kr(2)
14537 .sr(1)
14538 .m(m)
14539 .n(n)
14540 .k(8)
14541 .iterations(1)
14542 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
14543 }
14544 }
14545 }
14546
14547 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_eq_8_subtile_m) {
14548 TEST_REQUIRES_X86_AVX;
14549 for (uint32_t m = 1; m <= 1; m++) {
14550 GemmMicrokernelTester()
14551 .mr(1)
14552 .nr(4)
14553 .kr(2)
14554 .sr(1)
14555 .m(m)
14556 .n(4)
14557 .k(8)
14558 .iterations(1)
14559 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
14560 }
14561 }
14562
14563 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_eq_8_subtile_n) {
14564 TEST_REQUIRES_X86_AVX;
14565 for (uint32_t n = 1; n <= 4; n++) {
14566 GemmMicrokernelTester()
14567 .mr(1)
14568 .nr(4)
14569 .kr(2)
14570 .sr(1)
14571 .m(1)
14572 .n(n)
14573 .k(8)
14574 .iterations(1)
14575 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
14576 }
14577 }
14578
14579 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_lt_8) {
14580 TEST_REQUIRES_X86_AVX;
14581 for (size_t k = 1; k < 8; k++) {
14582 GemmMicrokernelTester()
14583 .mr(1)
14584 .nr(4)
14585 .kr(2)
14586 .sr(1)
14587 .m(1)
14588 .n(4)
14589 .k(k)
14590 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
14591 }
14592 }
14593
14594 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_lt_8_subtile) {
14595 TEST_REQUIRES_X86_AVX;
14596 for (size_t k = 1; k < 8; k++) {
14597 for (uint32_t m = 1; m <= 1; m++) {
14598 for (uint32_t n = 1; n <= 4; n++) {
14599 GemmMicrokernelTester()
14600 .mr(1)
14601 .nr(4)
14602 .kr(2)
14603 .sr(1)
14604 .m(m)
14605 .n(n)
14606 .k(k)
14607 .iterations(1)
14608 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
14609 }
14610 }
14611 }
14612 }
14613
14614 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_gt_8) {
14615 TEST_REQUIRES_X86_AVX;
14616 for (size_t k = 9; k < 16; k++) {
14617 GemmMicrokernelTester()
14618 .mr(1)
14619 .nr(4)
14620 .kr(2)
14621 .sr(1)
14622 .m(1)
14623 .n(4)
14624 .k(k)
14625 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
14626 }
14627 }
14628
14629 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_gt_8_subtile) {
14630 TEST_REQUIRES_X86_AVX;
14631 for (size_t k = 9; k < 16; k++) {
14632 for (uint32_t m = 1; m <= 1; m++) {
14633 for (uint32_t n = 1; n <= 4; n++) {
14634 GemmMicrokernelTester()
14635 .mr(1)
14636 .nr(4)
14637 .kr(2)
14638 .sr(1)
14639 .m(m)
14640 .n(n)
14641 .k(k)
14642 .iterations(1)
14643 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
14644 }
14645 }
14646 }
14647 }
14648
14649 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_div_8) {
14650 TEST_REQUIRES_X86_AVX;
14651 for (size_t k = 16; k <= 80; k += 8) {
14652 GemmMicrokernelTester()
14653 .mr(1)
14654 .nr(4)
14655 .kr(2)
14656 .sr(1)
14657 .m(1)
14658 .n(4)
14659 .k(k)
14660 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
14661 }
14662 }
14663
14664 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_div_8_subtile) {
14665 TEST_REQUIRES_X86_AVX;
14666 for (size_t k = 16; k <= 80; k += 8) {
14667 for (uint32_t m = 1; m <= 1; m++) {
14668 for (uint32_t n = 1; n <= 4; n++) {
14669 GemmMicrokernelTester()
14670 .mr(1)
14671 .nr(4)
14672 .kr(2)
14673 .sr(1)
14674 .m(m)
14675 .n(n)
14676 .k(k)
14677 .iterations(1)
14678 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
14679 }
14680 }
14681 }
14682 }
14683
14684 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, n_gt_4) {
14685 TEST_REQUIRES_X86_AVX;
14686 for (uint32_t n = 5; n < 8; n++) {
14687 for (size_t k = 1; k <= 40; k += 9) {
14688 GemmMicrokernelTester()
14689 .mr(1)
14690 .nr(4)
14691 .kr(2)
14692 .sr(1)
14693 .m(1)
14694 .n(4)
14695 .k(k)
14696 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
14697 }
14698 }
14699 }
14700
14701 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, n_gt_4_strided_cn) {
14702 TEST_REQUIRES_X86_AVX;
14703 for (uint32_t n = 5; n < 8; n++) {
14704 for (size_t k = 1; k <= 40; k += 9) {
14705 GemmMicrokernelTester()
14706 .mr(1)
14707 .nr(4)
14708 .kr(2)
14709 .sr(1)
14710 .m(1)
14711 .n(4)
14712 .k(k)
14713 .cn_stride(7)
14714 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
14715 }
14716 }
14717 }
14718
14719 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, n_gt_4_subtile) {
14720 TEST_REQUIRES_X86_AVX;
14721 for (uint32_t n = 5; n < 8; n++) {
14722 for (size_t k = 1; k <= 40; k += 9) {
14723 for (uint32_t m = 1; m <= 1; m++) {
14724 GemmMicrokernelTester()
14725 .mr(1)
14726 .nr(4)
14727 .kr(2)
14728 .sr(1)
14729 .m(m)
14730 .n(n)
14731 .k(k)
14732 .iterations(1)
14733 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
14734 }
14735 }
14736 }
14737 }
14738
14739 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, n_div_4) {
14740 TEST_REQUIRES_X86_AVX;
14741 for (uint32_t n = 8; n <= 12; n += 4) {
14742 for (size_t k = 1; k <= 40; k += 9) {
14743 GemmMicrokernelTester()
14744 .mr(1)
14745 .nr(4)
14746 .kr(2)
14747 .sr(1)
14748 .m(1)
14749 .n(4)
14750 .k(k)
14751 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
14752 }
14753 }
14754 }
14755
14756 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, n_div_4_strided_cn) {
14757 TEST_REQUIRES_X86_AVX;
14758 for (uint32_t n = 8; n <= 12; n += 4) {
14759 for (size_t k = 1; k <= 40; k += 9) {
14760 GemmMicrokernelTester()
14761 .mr(1)
14762 .nr(4)
14763 .kr(2)
14764 .sr(1)
14765 .m(1)
14766 .n(n)
14767 .k(k)
14768 .cn_stride(7)
14769 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
14770 }
14771 }
14772 }
14773
14774 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, n_div_4_subtile) {
14775 TEST_REQUIRES_X86_AVX;
14776 for (uint32_t n = 8; n <= 12; n += 4) {
14777 for (size_t k = 1; k <= 40; k += 9) {
14778 for (uint32_t m = 1; m <= 1; m++) {
14779 GemmMicrokernelTester()
14780 .mr(1)
14781 .nr(4)
14782 .kr(2)
14783 .sr(1)
14784 .m(m)
14785 .n(n)
14786 .k(k)
14787 .iterations(1)
14788 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
14789 }
14790 }
14791 }
14792 }
14793
14794 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, small_kernel) {
14795 TEST_REQUIRES_X86_AVX;
14796 for (size_t k = 1; k <= 40; k += 9) {
14797 GemmMicrokernelTester()
14798 .mr(1)
14799 .nr(4)
14800 .kr(2)
14801 .sr(1)
14802 .m(1)
14803 .n(4)
14804 .k(k)
14805 .ks(3)
14806 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
14807 }
14808 }
14809
14810 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, small_kernel_subtile) {
14811 TEST_REQUIRES_X86_AVX;
14812 for (size_t k = 1; k <= 40; k += 9) {
14813 for (uint32_t m = 1; m <= 1; m++) {
14814 for (uint32_t n = 1; n <= 4; n++) {
14815 GemmMicrokernelTester()
14816 .mr(1)
14817 .nr(4)
14818 .kr(2)
14819 .sr(1)
14820 .m(m)
14821 .n(n)
14822 .k(k)
14823 .ks(3)
14824 .iterations(1)
14825 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
14826 }
14827 }
14828 }
14829 }
14830
14831 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, n_gt_4_small_kernel) {
14832 TEST_REQUIRES_X86_AVX;
14833 for (uint32_t n = 5; n < 8; n++) {
14834 for (size_t k = 1; k <= 40; k += 9) {
14835 GemmMicrokernelTester()
14836 .mr(1)
14837 .nr(4)
14838 .kr(2)
14839 .sr(1)
14840 .m(1)
14841 .n(4)
14842 .k(k)
14843 .ks(3)
14844 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
14845 }
14846 }
14847 }
14848
14849 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, n_div_4_small_kernel) {
14850 TEST_REQUIRES_X86_AVX;
14851 for (uint32_t n = 8; n <= 12; n += 4) {
14852 for (size_t k = 1; k <= 40; k += 9) {
14853 GemmMicrokernelTester()
14854 .mr(1)
14855 .nr(4)
14856 .kr(2)
14857 .sr(1)
14858 .m(1)
14859 .n(4)
14860 .k(k)
14861 .ks(3)
14862 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
14863 }
14864 }
14865 }
14866
14867 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, strided_cm_subtile) {
14868 TEST_REQUIRES_X86_AVX;
14869 for (size_t k = 1; k <= 40; k += 9) {
14870 for (uint32_t m = 1; m <= 1; m++) {
14871 for (uint32_t n = 1; n <= 4; n++) {
14872 GemmMicrokernelTester()
14873 .mr(1)
14874 .nr(4)
14875 .kr(2)
14876 .sr(1)
14877 .m(m)
14878 .n(n)
14879 .k(k)
14880 .cm_stride(7)
14881 .iterations(1)
14882 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
14883 }
14884 }
14885 }
14886 }
14887
14888 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, a_offset) {
14889 TEST_REQUIRES_X86_AVX;
14890 for (size_t k = 1; k <= 40; k += 9) {
14891 GemmMicrokernelTester()
14892 .mr(1)
14893 .nr(4)
14894 .kr(2)
14895 .sr(1)
14896 .m(1)
14897 .n(4)
14898 .k(k)
14899 .ks(3)
14900 .a_offset(43)
14901 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
14902 }
14903 }
14904
14905 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, zero) {
14906 TEST_REQUIRES_X86_AVX;
14907 for (uint32_t mz = 0; mz < 1; mz++) {
14908 for (size_t k = 1; k <= 40; k += 9) {
14909 GemmMicrokernelTester()
14910 .mr(1)
14911 .nr(4)
14912 .kr(2)
14913 .sr(1)
14914 .m(1)
14915 .n(4)
14916 .k(k)
14917 .ks(3)
14918 .a_offset(43)
14919 .zero_index(mz)
14920 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
14921 }
14922 }
14923 }
14924
14925 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, qmin) {
14926 TEST_REQUIRES_X86_AVX;
14927 GemmMicrokernelTester()
14928 .mr(1)
14929 .nr(4)
14930 .kr(2)
14931 .sr(1)
14932 .m(1)
14933 .n(4)
14934 .k(8)
14935 .qmin(128)
14936 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
14937 }
14938
14939 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, qmax) {
14940 TEST_REQUIRES_X86_AVX;
14941 GemmMicrokernelTester()
14942 .mr(1)
14943 .nr(4)
14944 .kr(2)
14945 .sr(1)
14946 .m(1)
14947 .n(4)
14948 .k(8)
14949 .qmax(128)
14950 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
14951 }
14952
14953 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, strided_cm) {
14954 TEST_REQUIRES_X86_AVX;
14955 GemmMicrokernelTester()
14956 .mr(1)
14957 .nr(4)
14958 .kr(2)
14959 .sr(1)
14960 .m(1)
14961 .n(4)
14962 .k(8)
14963 .cm_stride(7)
14964 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
14965 }
14966
14967 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, no_a_zero_point) {
14968 TEST_REQUIRES_X86_AVX;
14969 for (size_t k = 1; k <= 40; k += 9) {
14970 GemmMicrokernelTester()
14971 .mr(1)
14972 .nr(4)
14973 .kr(2)
14974 .sr(1)
14975 .m(1)
14976 .n(4)
14977 .k(k)
14978 .a_zero_point(0)
14979 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
14980 }
14981 }
14982
14983 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, no_b_zero_point) {
14984 TEST_REQUIRES_X86_AVX;
14985 for (size_t k = 1; k <= 40; k += 9) {
14986 GemmMicrokernelTester()
14987 .mr(1)
14988 .nr(4)
14989 .kr(2)
14990 .sr(1)
14991 .m(1)
14992 .n(4)
14993 .k(k)
14994 .b_zero_point(0)
14995 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
14996 }
14997 }
14998
14999 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, no_zero_point) {
15000 TEST_REQUIRES_X86_AVX;
15001 for (size_t k = 1; k <= 40; k += 9) {
15002 GemmMicrokernelTester()
15003 .mr(1)
15004 .nr(4)
15005 .kr(2)
15006 .sr(1)
15007 .m(1)
15008 .n(4)
15009 .k(k)
15010 .a_zero_point(0)
15011 .b_zero_point(0)
15012 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
15013 }
15014 }
15015#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
15016
15017
15018#if XNN_ARCH_X86 || XNN_ARCH_X86_64
15019 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_eq_8) {
15020 TEST_REQUIRES_X86_AVX;
15021 GemmMicrokernelTester()
15022 .mr(2)
15023 .nr(4)
15024 .kr(2)
15025 .sr(1)
15026 .m(2)
15027 .n(4)
15028 .k(8)
15029 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
15030 }
15031
15032 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, strided_cn) {
15033 TEST_REQUIRES_X86_AVX;
15034 GemmMicrokernelTester()
15035 .mr(2)
15036 .nr(4)
15037 .kr(2)
15038 .sr(1)
15039 .m(2)
15040 .n(4)
15041 .k(8)
15042 .cn_stride(7)
15043 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
15044 }
15045
15046 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_eq_8_subtile) {
15047 TEST_REQUIRES_X86_AVX;
15048 for (uint32_t m = 1; m <= 2; m++) {
15049 for (uint32_t n = 1; n <= 4; n++) {
15050 GemmMicrokernelTester()
15051 .mr(2)
15052 .nr(4)
15053 .kr(2)
15054 .sr(1)
15055 .m(m)
15056 .n(n)
15057 .k(8)
15058 .iterations(1)
15059 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
15060 }
15061 }
15062 }
15063
15064 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_eq_8_subtile_m) {
15065 TEST_REQUIRES_X86_AVX;
15066 for (uint32_t m = 1; m <= 2; m++) {
15067 GemmMicrokernelTester()
15068 .mr(2)
15069 .nr(4)
15070 .kr(2)
15071 .sr(1)
15072 .m(m)
15073 .n(4)
15074 .k(8)
15075 .iterations(1)
15076 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
15077 }
15078 }
15079
15080 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_eq_8_subtile_n) {
15081 TEST_REQUIRES_X86_AVX;
15082 for (uint32_t n = 1; n <= 4; n++) {
15083 GemmMicrokernelTester()
15084 .mr(2)
15085 .nr(4)
15086 .kr(2)
15087 .sr(1)
15088 .m(2)
15089 .n(n)
15090 .k(8)
15091 .iterations(1)
15092 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
15093 }
15094 }
15095
15096 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_lt_8) {
15097 TEST_REQUIRES_X86_AVX;
15098 for (size_t k = 1; k < 8; k++) {
15099 GemmMicrokernelTester()
15100 .mr(2)
15101 .nr(4)
15102 .kr(2)
15103 .sr(1)
15104 .m(2)
15105 .n(4)
15106 .k(k)
15107 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
15108 }
15109 }
15110
15111 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_lt_8_subtile) {
15112 TEST_REQUIRES_X86_AVX;
15113 for (size_t k = 1; k < 8; k++) {
15114 for (uint32_t m = 1; m <= 2; m++) {
15115 for (uint32_t n = 1; n <= 4; n++) {
15116 GemmMicrokernelTester()
15117 .mr(2)
15118 .nr(4)
15119 .kr(2)
15120 .sr(1)
15121 .m(m)
15122 .n(n)
15123 .k(k)
15124 .iterations(1)
15125 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
15126 }
15127 }
15128 }
15129 }
15130
15131 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_gt_8) {
15132 TEST_REQUIRES_X86_AVX;
15133 for (size_t k = 9; k < 16; k++) {
15134 GemmMicrokernelTester()
15135 .mr(2)
15136 .nr(4)
15137 .kr(2)
15138 .sr(1)
15139 .m(2)
15140 .n(4)
15141 .k(k)
15142 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
15143 }
15144 }
15145
15146 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_gt_8_subtile) {
15147 TEST_REQUIRES_X86_AVX;
15148 for (size_t k = 9; k < 16; k++) {
15149 for (uint32_t m = 1; m <= 2; m++) {
15150 for (uint32_t n = 1; n <= 4; n++) {
15151 GemmMicrokernelTester()
15152 .mr(2)
15153 .nr(4)
15154 .kr(2)
15155 .sr(1)
15156 .m(m)
15157 .n(n)
15158 .k(k)
15159 .iterations(1)
15160 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
15161 }
15162 }
15163 }
15164 }
15165
15166 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_div_8) {
15167 TEST_REQUIRES_X86_AVX;
15168 for (size_t k = 16; k <= 80; k += 8) {
15169 GemmMicrokernelTester()
15170 .mr(2)
15171 .nr(4)
15172 .kr(2)
15173 .sr(1)
15174 .m(2)
15175 .n(4)
15176 .k(k)
15177 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
15178 }
15179 }
15180
15181 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_div_8_subtile) {
15182 TEST_REQUIRES_X86_AVX;
15183 for (size_t k = 16; k <= 80; k += 8) {
15184 for (uint32_t m = 1; m <= 2; m++) {
15185 for (uint32_t n = 1; n <= 4; n++) {
15186 GemmMicrokernelTester()
15187 .mr(2)
15188 .nr(4)
15189 .kr(2)
15190 .sr(1)
15191 .m(m)
15192 .n(n)
15193 .k(k)
15194 .iterations(1)
15195 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
15196 }
15197 }
15198 }
15199 }
15200
15201 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, n_gt_4) {
15202 TEST_REQUIRES_X86_AVX;
15203 for (uint32_t n = 5; n < 8; n++) {
15204 for (size_t k = 1; k <= 40; k += 9) {
15205 GemmMicrokernelTester()
15206 .mr(2)
15207 .nr(4)
15208 .kr(2)
15209 .sr(1)
15210 .m(2)
15211 .n(4)
15212 .k(k)
15213 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
15214 }
15215 }
15216 }
15217
15218 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, n_gt_4_strided_cn) {
15219 TEST_REQUIRES_X86_AVX;
15220 for (uint32_t n = 5; n < 8; n++) {
15221 for (size_t k = 1; k <= 40; k += 9) {
15222 GemmMicrokernelTester()
15223 .mr(2)
15224 .nr(4)
15225 .kr(2)
15226 .sr(1)
15227 .m(2)
15228 .n(4)
15229 .k(k)
15230 .cn_stride(7)
15231 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
15232 }
15233 }
15234 }
15235
15236 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, n_gt_4_subtile) {
15237 TEST_REQUIRES_X86_AVX;
15238 for (uint32_t n = 5; n < 8; n++) {
15239 for (size_t k = 1; k <= 40; k += 9) {
15240 for (uint32_t m = 1; m <= 2; m++) {
15241 GemmMicrokernelTester()
15242 .mr(2)
15243 .nr(4)
15244 .kr(2)
15245 .sr(1)
15246 .m(m)
15247 .n(n)
15248 .k(k)
15249 .iterations(1)
15250 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
15251 }
15252 }
15253 }
15254 }
15255
15256 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, n_div_4) {
15257 TEST_REQUIRES_X86_AVX;
15258 for (uint32_t n = 8; n <= 12; n += 4) {
15259 for (size_t k = 1; k <= 40; k += 9) {
15260 GemmMicrokernelTester()
15261 .mr(2)
15262 .nr(4)
15263 .kr(2)
15264 .sr(1)
15265 .m(2)
15266 .n(4)
15267 .k(k)
15268 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
15269 }
15270 }
15271 }
15272
15273 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, n_div_4_strided_cn) {
15274 TEST_REQUIRES_X86_AVX;
15275 for (uint32_t n = 8; n <= 12; n += 4) {
15276 for (size_t k = 1; k <= 40; k += 9) {
15277 GemmMicrokernelTester()
15278 .mr(2)
15279 .nr(4)
15280 .kr(2)
15281 .sr(1)
15282 .m(2)
15283 .n(n)
15284 .k(k)
15285 .cn_stride(7)
15286 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
15287 }
15288 }
15289 }
15290
15291 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, n_div_4_subtile) {
15292 TEST_REQUIRES_X86_AVX;
15293 for (uint32_t n = 8; n <= 12; n += 4) {
15294 for (size_t k = 1; k <= 40; k += 9) {
15295 for (uint32_t m = 1; m <= 2; m++) {
15296 GemmMicrokernelTester()
15297 .mr(2)
15298 .nr(4)
15299 .kr(2)
15300 .sr(1)
15301 .m(m)
15302 .n(n)
15303 .k(k)
15304 .iterations(1)
15305 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
15306 }
15307 }
15308 }
15309 }
15310
15311 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, small_kernel) {
15312 TEST_REQUIRES_X86_AVX;
15313 for (size_t k = 1; k <= 40; k += 9) {
15314 GemmMicrokernelTester()
15315 .mr(2)
15316 .nr(4)
15317 .kr(2)
15318 .sr(1)
15319 .m(2)
15320 .n(4)
15321 .k(k)
15322 .ks(3)
15323 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
15324 }
15325 }
15326
15327 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, small_kernel_subtile) {
15328 TEST_REQUIRES_X86_AVX;
15329 for (size_t k = 1; k <= 40; k += 9) {
15330 for (uint32_t m = 1; m <= 2; m++) {
15331 for (uint32_t n = 1; n <= 4; n++) {
15332 GemmMicrokernelTester()
15333 .mr(2)
15334 .nr(4)
15335 .kr(2)
15336 .sr(1)
15337 .m(m)
15338 .n(n)
15339 .k(k)
15340 .ks(3)
15341 .iterations(1)
15342 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
15343 }
15344 }
15345 }
15346 }
15347
15348 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, n_gt_4_small_kernel) {
15349 TEST_REQUIRES_X86_AVX;
15350 for (uint32_t n = 5; n < 8; n++) {
15351 for (size_t k = 1; k <= 40; k += 9) {
15352 GemmMicrokernelTester()
15353 .mr(2)
15354 .nr(4)
15355 .kr(2)
15356 .sr(1)
15357 .m(2)
15358 .n(4)
15359 .k(k)
15360 .ks(3)
15361 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
15362 }
15363 }
15364 }
15365
15366 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, n_div_4_small_kernel) {
15367 TEST_REQUIRES_X86_AVX;
15368 for (uint32_t n = 8; n <= 12; n += 4) {
15369 for (size_t k = 1; k <= 40; k += 9) {
15370 GemmMicrokernelTester()
15371 .mr(2)
15372 .nr(4)
15373 .kr(2)
15374 .sr(1)
15375 .m(2)
15376 .n(4)
15377 .k(k)
15378 .ks(3)
15379 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
15380 }
15381 }
15382 }
15383
15384 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, strided_cm_subtile) {
15385 TEST_REQUIRES_X86_AVX;
15386 for (size_t k = 1; k <= 40; k += 9) {
15387 for (uint32_t m = 1; m <= 2; m++) {
15388 for (uint32_t n = 1; n <= 4; n++) {
15389 GemmMicrokernelTester()
15390 .mr(2)
15391 .nr(4)
15392 .kr(2)
15393 .sr(1)
15394 .m(m)
15395 .n(n)
15396 .k(k)
15397 .cm_stride(7)
15398 .iterations(1)
15399 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
15400 }
15401 }
15402 }
15403 }
15404
15405 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, a_offset) {
15406 TEST_REQUIRES_X86_AVX;
15407 for (size_t k = 1; k <= 40; k += 9) {
15408 GemmMicrokernelTester()
15409 .mr(2)
15410 .nr(4)
15411 .kr(2)
15412 .sr(1)
15413 .m(2)
15414 .n(4)
15415 .k(k)
15416 .ks(3)
15417 .a_offset(83)
15418 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
15419 }
15420 }
15421
15422 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, zero) {
15423 TEST_REQUIRES_X86_AVX;
15424 for (uint32_t mz = 0; mz < 2; mz++) {
15425 for (size_t k = 1; k <= 40; k += 9) {
15426 GemmMicrokernelTester()
15427 .mr(2)
15428 .nr(4)
15429 .kr(2)
15430 .sr(1)
15431 .m(2)
15432 .n(4)
15433 .k(k)
15434 .ks(3)
15435 .a_offset(83)
15436 .zero_index(mz)
15437 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
15438 }
15439 }
15440 }
15441
15442 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, qmin) {
15443 TEST_REQUIRES_X86_AVX;
15444 GemmMicrokernelTester()
15445 .mr(2)
15446 .nr(4)
15447 .kr(2)
15448 .sr(1)
15449 .m(2)
15450 .n(4)
15451 .k(8)
15452 .qmin(128)
15453 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
15454 }
15455
15456 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, qmax) {
15457 TEST_REQUIRES_X86_AVX;
15458 GemmMicrokernelTester()
15459 .mr(2)
15460 .nr(4)
15461 .kr(2)
15462 .sr(1)
15463 .m(2)
15464 .n(4)
15465 .k(8)
15466 .qmax(128)
15467 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
15468 }
15469
15470 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, strided_cm) {
15471 TEST_REQUIRES_X86_AVX;
15472 GemmMicrokernelTester()
15473 .mr(2)
15474 .nr(4)
15475 .kr(2)
15476 .sr(1)
15477 .m(2)
15478 .n(4)
15479 .k(8)
15480 .cm_stride(7)
15481 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
15482 }
15483
15484 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, no_a_zero_point) {
15485 TEST_REQUIRES_X86_AVX;
15486 for (size_t k = 1; k <= 40; k += 9) {
15487 GemmMicrokernelTester()
15488 .mr(2)
15489 .nr(4)
15490 .kr(2)
15491 .sr(1)
15492 .m(2)
15493 .n(4)
15494 .k(k)
15495 .a_zero_point(0)
15496 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
15497 }
15498 }
15499
15500 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, no_b_zero_point) {
15501 TEST_REQUIRES_X86_AVX;
15502 for (size_t k = 1; k <= 40; k += 9) {
15503 GemmMicrokernelTester()
15504 .mr(2)
15505 .nr(4)
15506 .kr(2)
15507 .sr(1)
15508 .m(2)
15509 .n(4)
15510 .k(k)
15511 .b_zero_point(0)
15512 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
15513 }
15514 }
15515
15516 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, no_zero_point) {
15517 TEST_REQUIRES_X86_AVX;
15518 for (size_t k = 1; k <= 40; k += 9) {
15519 GemmMicrokernelTester()
15520 .mr(2)
15521 .nr(4)
15522 .kr(2)
15523 .sr(1)
15524 .m(2)
15525 .n(4)
15526 .k(k)
15527 .a_zero_point(0)
15528 .b_zero_point(0)
15529 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
15530 }
15531 }
15532#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
15533
15534
15535#if XNN_ARCH_X86 || XNN_ARCH_X86_64
15536 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_eq_8) {
15537 TEST_REQUIRES_X86_AVX;
15538 GemmMicrokernelTester()
15539 .mr(3)
15540 .nr(4)
15541 .kr(2)
15542 .sr(1)
15543 .m(3)
15544 .n(4)
15545 .k(8)
15546 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
15547 }
15548
15549 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, strided_cn) {
15550 TEST_REQUIRES_X86_AVX;
15551 GemmMicrokernelTester()
15552 .mr(3)
15553 .nr(4)
15554 .kr(2)
15555 .sr(1)
15556 .m(3)
15557 .n(4)
15558 .k(8)
15559 .cn_stride(7)
15560 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
15561 }
15562
15563 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_eq_8_subtile) {
15564 TEST_REQUIRES_X86_AVX;
15565 for (uint32_t m = 1; m <= 3; m++) {
15566 for (uint32_t n = 1; n <= 4; n++) {
15567 GemmMicrokernelTester()
15568 .mr(3)
15569 .nr(4)
15570 .kr(2)
15571 .sr(1)
15572 .m(m)
15573 .n(n)
15574 .k(8)
15575 .iterations(1)
15576 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
15577 }
15578 }
15579 }
15580
15581 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_eq_8_subtile_m) {
15582 TEST_REQUIRES_X86_AVX;
15583 for (uint32_t m = 1; m <= 3; m++) {
15584 GemmMicrokernelTester()
15585 .mr(3)
15586 .nr(4)
15587 .kr(2)
15588 .sr(1)
15589 .m(m)
15590 .n(4)
15591 .k(8)
15592 .iterations(1)
15593 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
15594 }
15595 }
15596
15597 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_eq_8_subtile_n) {
15598 TEST_REQUIRES_X86_AVX;
15599 for (uint32_t n = 1; n <= 4; n++) {
15600 GemmMicrokernelTester()
15601 .mr(3)
15602 .nr(4)
15603 .kr(2)
15604 .sr(1)
15605 .m(3)
15606 .n(n)
15607 .k(8)
15608 .iterations(1)
15609 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
15610 }
15611 }
15612
15613 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_lt_8) {
15614 TEST_REQUIRES_X86_AVX;
15615 for (size_t k = 1; k < 8; k++) {
15616 GemmMicrokernelTester()
15617 .mr(3)
15618 .nr(4)
15619 .kr(2)
15620 .sr(1)
15621 .m(3)
15622 .n(4)
15623 .k(k)
15624 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
15625 }
15626 }
15627
15628 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_lt_8_subtile) {
15629 TEST_REQUIRES_X86_AVX;
15630 for (size_t k = 1; k < 8; k++) {
15631 for (uint32_t m = 1; m <= 3; m++) {
15632 for (uint32_t n = 1; n <= 4; n++) {
15633 GemmMicrokernelTester()
15634 .mr(3)
15635 .nr(4)
15636 .kr(2)
15637 .sr(1)
15638 .m(m)
15639 .n(n)
15640 .k(k)
15641 .iterations(1)
15642 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
15643 }
15644 }
15645 }
15646 }
15647
15648 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_gt_8) {
15649 TEST_REQUIRES_X86_AVX;
15650 for (size_t k = 9; k < 16; k++) {
15651 GemmMicrokernelTester()
15652 .mr(3)
15653 .nr(4)
15654 .kr(2)
15655 .sr(1)
15656 .m(3)
15657 .n(4)
15658 .k(k)
15659 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
15660 }
15661 }
15662
15663 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_gt_8_subtile) {
15664 TEST_REQUIRES_X86_AVX;
15665 for (size_t k = 9; k < 16; k++) {
15666 for (uint32_t m = 1; m <= 3; m++) {
15667 for (uint32_t n = 1; n <= 4; n++) {
15668 GemmMicrokernelTester()
15669 .mr(3)
15670 .nr(4)
15671 .kr(2)
15672 .sr(1)
15673 .m(m)
15674 .n(n)
15675 .k(k)
15676 .iterations(1)
15677 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
15678 }
15679 }
15680 }
15681 }
15682
15683 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_div_8) {
15684 TEST_REQUIRES_X86_AVX;
15685 for (size_t k = 16; k <= 80; k += 8) {
15686 GemmMicrokernelTester()
15687 .mr(3)
15688 .nr(4)
15689 .kr(2)
15690 .sr(1)
15691 .m(3)
15692 .n(4)
15693 .k(k)
15694 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
15695 }
15696 }
15697
15698 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_div_8_subtile) {
15699 TEST_REQUIRES_X86_AVX;
15700 for (size_t k = 16; k <= 80; k += 8) {
15701 for (uint32_t m = 1; m <= 3; m++) {
15702 for (uint32_t n = 1; n <= 4; n++) {
15703 GemmMicrokernelTester()
15704 .mr(3)
15705 .nr(4)
15706 .kr(2)
15707 .sr(1)
15708 .m(m)
15709 .n(n)
15710 .k(k)
15711 .iterations(1)
15712 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
15713 }
15714 }
15715 }
15716 }
15717
15718 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_gt_4) {
15719 TEST_REQUIRES_X86_AVX;
15720 for (uint32_t n = 5; n < 8; n++) {
15721 for (size_t k = 1; k <= 40; k += 9) {
15722 GemmMicrokernelTester()
15723 .mr(3)
15724 .nr(4)
15725 .kr(2)
15726 .sr(1)
15727 .m(3)
15728 .n(4)
15729 .k(k)
15730 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
15731 }
15732 }
15733 }
15734
15735 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_gt_4_strided_cn) {
15736 TEST_REQUIRES_X86_AVX;
15737 for (uint32_t n = 5; n < 8; n++) {
15738 for (size_t k = 1; k <= 40; k += 9) {
15739 GemmMicrokernelTester()
15740 .mr(3)
15741 .nr(4)
15742 .kr(2)
15743 .sr(1)
15744 .m(3)
15745 .n(4)
15746 .k(k)
15747 .cn_stride(7)
15748 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
15749 }
15750 }
15751 }
15752
15753 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_gt_4_subtile) {
15754 TEST_REQUIRES_X86_AVX;
15755 for (uint32_t n = 5; n < 8; n++) {
15756 for (size_t k = 1; k <= 40; k += 9) {
15757 for (uint32_t m = 1; m <= 3; m++) {
15758 GemmMicrokernelTester()
15759 .mr(3)
15760 .nr(4)
15761 .kr(2)
15762 .sr(1)
15763 .m(m)
15764 .n(n)
15765 .k(k)
15766 .iterations(1)
15767 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
15768 }
15769 }
15770 }
15771 }
15772
15773 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_div_4) {
15774 TEST_REQUIRES_X86_AVX;
15775 for (uint32_t n = 8; n <= 12; n += 4) {
15776 for (size_t k = 1; k <= 40; k += 9) {
15777 GemmMicrokernelTester()
15778 .mr(3)
15779 .nr(4)
15780 .kr(2)
15781 .sr(1)
15782 .m(3)
15783 .n(4)
15784 .k(k)
15785 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
15786 }
15787 }
15788 }
15789
15790 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_div_4_strided_cn) {
15791 TEST_REQUIRES_X86_AVX;
15792 for (uint32_t n = 8; n <= 12; n += 4) {
15793 for (size_t k = 1; k <= 40; k += 9) {
15794 GemmMicrokernelTester()
15795 .mr(3)
15796 .nr(4)
15797 .kr(2)
15798 .sr(1)
15799 .m(3)
15800 .n(n)
15801 .k(k)
15802 .cn_stride(7)
15803 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
15804 }
15805 }
15806 }
15807
15808 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_div_4_subtile) {
15809 TEST_REQUIRES_X86_AVX;
15810 for (uint32_t n = 8; n <= 12; n += 4) {
15811 for (size_t k = 1; k <= 40; k += 9) {
15812 for (uint32_t m = 1; m <= 3; m++) {
15813 GemmMicrokernelTester()
15814 .mr(3)
15815 .nr(4)
15816 .kr(2)
15817 .sr(1)
15818 .m(m)
15819 .n(n)
15820 .k(k)
15821 .iterations(1)
15822 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
15823 }
15824 }
15825 }
15826 }
15827
15828 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, small_kernel) {
15829 TEST_REQUIRES_X86_AVX;
15830 for (size_t k = 1; k <= 40; k += 9) {
15831 GemmMicrokernelTester()
15832 .mr(3)
15833 .nr(4)
15834 .kr(2)
15835 .sr(1)
15836 .m(3)
15837 .n(4)
15838 .k(k)
15839 .ks(3)
15840 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
15841 }
15842 }
15843
15844 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, small_kernel_subtile) {
15845 TEST_REQUIRES_X86_AVX;
15846 for (size_t k = 1; k <= 40; k += 9) {
15847 for (uint32_t m = 1; m <= 3; m++) {
15848 for (uint32_t n = 1; n <= 4; n++) {
15849 GemmMicrokernelTester()
15850 .mr(3)
15851 .nr(4)
15852 .kr(2)
15853 .sr(1)
15854 .m(m)
15855 .n(n)
15856 .k(k)
15857 .ks(3)
15858 .iterations(1)
15859 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
15860 }
15861 }
15862 }
15863 }
15864
15865 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_gt_4_small_kernel) {
15866 TEST_REQUIRES_X86_AVX;
15867 for (uint32_t n = 5; n < 8; n++) {
15868 for (size_t k = 1; k <= 40; k += 9) {
15869 GemmMicrokernelTester()
15870 .mr(3)
15871 .nr(4)
15872 .kr(2)
15873 .sr(1)
15874 .m(3)
15875 .n(4)
15876 .k(k)
15877 .ks(3)
15878 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
15879 }
15880 }
15881 }
15882
15883 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_div_4_small_kernel) {
15884 TEST_REQUIRES_X86_AVX;
15885 for (uint32_t n = 8; n <= 12; n += 4) {
15886 for (size_t k = 1; k <= 40; k += 9) {
15887 GemmMicrokernelTester()
15888 .mr(3)
15889 .nr(4)
15890 .kr(2)
15891 .sr(1)
15892 .m(3)
15893 .n(4)
15894 .k(k)
15895 .ks(3)
15896 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
15897 }
15898 }
15899 }
15900
15901 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, strided_cm_subtile) {
15902 TEST_REQUIRES_X86_AVX;
15903 for (size_t k = 1; k <= 40; k += 9) {
15904 for (uint32_t m = 1; m <= 3; m++) {
15905 for (uint32_t n = 1; n <= 4; n++) {
15906 GemmMicrokernelTester()
15907 .mr(3)
15908 .nr(4)
15909 .kr(2)
15910 .sr(1)
15911 .m(m)
15912 .n(n)
15913 .k(k)
15914 .cm_stride(7)
15915 .iterations(1)
15916 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
15917 }
15918 }
15919 }
15920 }
15921
15922 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, a_offset) {
15923 TEST_REQUIRES_X86_AVX;
15924 for (size_t k = 1; k <= 40; k += 9) {
15925 GemmMicrokernelTester()
15926 .mr(3)
15927 .nr(4)
15928 .kr(2)
15929 .sr(1)
15930 .m(3)
15931 .n(4)
15932 .k(k)
15933 .ks(3)
15934 .a_offset(127)
15935 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
15936 }
15937 }
15938
15939 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, zero) {
15940 TEST_REQUIRES_X86_AVX;
15941 for (uint32_t mz = 0; mz < 3; mz++) {
15942 for (size_t k = 1; k <= 40; k += 9) {
15943 GemmMicrokernelTester()
15944 .mr(3)
15945 .nr(4)
15946 .kr(2)
15947 .sr(1)
15948 .m(3)
15949 .n(4)
15950 .k(k)
15951 .ks(3)
15952 .a_offset(127)
15953 .zero_index(mz)
15954 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
15955 }
15956 }
15957 }
15958
15959 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, qmin) {
15960 TEST_REQUIRES_X86_AVX;
15961 GemmMicrokernelTester()
15962 .mr(3)
15963 .nr(4)
15964 .kr(2)
15965 .sr(1)
15966 .m(3)
15967 .n(4)
15968 .k(8)
15969 .qmin(128)
15970 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
15971 }
15972
15973 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, qmax) {
15974 TEST_REQUIRES_X86_AVX;
15975 GemmMicrokernelTester()
15976 .mr(3)
15977 .nr(4)
15978 .kr(2)
15979 .sr(1)
15980 .m(3)
15981 .n(4)
15982 .k(8)
15983 .qmax(128)
15984 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
15985 }
15986
15987 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, strided_cm) {
15988 TEST_REQUIRES_X86_AVX;
15989 GemmMicrokernelTester()
15990 .mr(3)
15991 .nr(4)
15992 .kr(2)
15993 .sr(1)
15994 .m(3)
15995 .n(4)
15996 .k(8)
15997 .cm_stride(7)
15998 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
15999 }
16000
16001 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, no_a_zero_point) {
16002 TEST_REQUIRES_X86_AVX;
16003 for (size_t k = 1; k <= 40; k += 9) {
16004 GemmMicrokernelTester()
16005 .mr(3)
16006 .nr(4)
16007 .kr(2)
16008 .sr(1)
16009 .m(3)
16010 .n(4)
16011 .k(k)
16012 .a_zero_point(0)
16013 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
16014 }
16015 }
16016
16017 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, no_b_zero_point) {
16018 TEST_REQUIRES_X86_AVX;
16019 for (size_t k = 1; k <= 40; k += 9) {
16020 GemmMicrokernelTester()
16021 .mr(3)
16022 .nr(4)
16023 .kr(2)
16024 .sr(1)
16025 .m(3)
16026 .n(4)
16027 .k(k)
16028 .b_zero_point(0)
16029 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
16030 }
16031 }
16032
16033 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, no_zero_point) {
16034 TEST_REQUIRES_X86_AVX;
16035 for (size_t k = 1; k <= 40; k += 9) {
16036 GemmMicrokernelTester()
16037 .mr(3)
16038 .nr(4)
16039 .kr(2)
16040 .sr(1)
16041 .m(3)
16042 .n(4)
16043 .k(k)
16044 .a_zero_point(0)
16045 .b_zero_point(0)
16046 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
16047 }
16048 }
16049#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
16050
16051
16052#if XNN_ARCH_X86 || XNN_ARCH_X86_64
16053 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_eq_8) {
16054 TEST_REQUIRES_X86_AVX;
16055 GemmMicrokernelTester()
16056 .mr(4)
16057 .nr(4)
16058 .kr(2)
16059 .sr(1)
16060 .m(4)
16061 .n(4)
16062 .k(8)
16063 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
16064 }
16065
16066 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, strided_cn) {
16067 TEST_REQUIRES_X86_AVX;
16068 GemmMicrokernelTester()
16069 .mr(4)
16070 .nr(4)
16071 .kr(2)
16072 .sr(1)
16073 .m(4)
16074 .n(4)
16075 .k(8)
16076 .cn_stride(7)
16077 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
16078 }
16079
16080 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_eq_8_subtile) {
16081 TEST_REQUIRES_X86_AVX;
16082 for (uint32_t m = 1; m <= 4; m++) {
16083 for (uint32_t n = 1; n <= 4; n++) {
16084 GemmMicrokernelTester()
16085 .mr(4)
16086 .nr(4)
16087 .kr(2)
16088 .sr(1)
16089 .m(m)
16090 .n(n)
16091 .k(8)
16092 .iterations(1)
16093 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
16094 }
16095 }
16096 }
16097
16098 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_eq_8_subtile_m) {
16099 TEST_REQUIRES_X86_AVX;
16100 for (uint32_t m = 1; m <= 4; m++) {
16101 GemmMicrokernelTester()
16102 .mr(4)
16103 .nr(4)
16104 .kr(2)
16105 .sr(1)
16106 .m(m)
16107 .n(4)
16108 .k(8)
16109 .iterations(1)
16110 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
16111 }
16112 }
16113
16114 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_eq_8_subtile_n) {
16115 TEST_REQUIRES_X86_AVX;
16116 for (uint32_t n = 1; n <= 4; n++) {
16117 GemmMicrokernelTester()
16118 .mr(4)
16119 .nr(4)
16120 .kr(2)
16121 .sr(1)
16122 .m(4)
16123 .n(n)
16124 .k(8)
16125 .iterations(1)
16126 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
16127 }
16128 }
16129
16130 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_lt_8) {
16131 TEST_REQUIRES_X86_AVX;
16132 for (size_t k = 1; k < 8; k++) {
16133 GemmMicrokernelTester()
16134 .mr(4)
16135 .nr(4)
16136 .kr(2)
16137 .sr(1)
16138 .m(4)
16139 .n(4)
16140 .k(k)
16141 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
16142 }
16143 }
16144
16145 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_lt_8_subtile) {
16146 TEST_REQUIRES_X86_AVX;
16147 for (size_t k = 1; k < 8; k++) {
16148 for (uint32_t m = 1; m <= 4; m++) {
16149 for (uint32_t n = 1; n <= 4; n++) {
16150 GemmMicrokernelTester()
16151 .mr(4)
16152 .nr(4)
16153 .kr(2)
16154 .sr(1)
16155 .m(m)
16156 .n(n)
16157 .k(k)
16158 .iterations(1)
16159 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
16160 }
16161 }
16162 }
16163 }
16164
16165 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_gt_8) {
16166 TEST_REQUIRES_X86_AVX;
16167 for (size_t k = 9; k < 16; k++) {
16168 GemmMicrokernelTester()
16169 .mr(4)
16170 .nr(4)
16171 .kr(2)
16172 .sr(1)
16173 .m(4)
16174 .n(4)
16175 .k(k)
16176 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
16177 }
16178 }
16179
16180 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_gt_8_subtile) {
16181 TEST_REQUIRES_X86_AVX;
16182 for (size_t k = 9; k < 16; k++) {
16183 for (uint32_t m = 1; m <= 4; m++) {
16184 for (uint32_t n = 1; n <= 4; n++) {
16185 GemmMicrokernelTester()
16186 .mr(4)
16187 .nr(4)
16188 .kr(2)
16189 .sr(1)
16190 .m(m)
16191 .n(n)
16192 .k(k)
16193 .iterations(1)
16194 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
16195 }
16196 }
16197 }
16198 }
16199
16200 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_div_8) {
16201 TEST_REQUIRES_X86_AVX;
16202 for (size_t k = 16; k <= 80; k += 8) {
16203 GemmMicrokernelTester()
16204 .mr(4)
16205 .nr(4)
16206 .kr(2)
16207 .sr(1)
16208 .m(4)
16209 .n(4)
16210 .k(k)
16211 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
16212 }
16213 }
16214
16215 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_div_8_subtile) {
16216 TEST_REQUIRES_X86_AVX;
16217 for (size_t k = 16; k <= 80; k += 8) {
16218 for (uint32_t m = 1; m <= 4; m++) {
16219 for (uint32_t n = 1; n <= 4; n++) {
16220 GemmMicrokernelTester()
16221 .mr(4)
16222 .nr(4)
16223 .kr(2)
16224 .sr(1)
16225 .m(m)
16226 .n(n)
16227 .k(k)
16228 .iterations(1)
16229 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
16230 }
16231 }
16232 }
16233 }
16234
16235 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, n_gt_4) {
16236 TEST_REQUIRES_X86_AVX;
16237 for (uint32_t n = 5; n < 8; n++) {
16238 for (size_t k = 1; k <= 40; k += 9) {
16239 GemmMicrokernelTester()
16240 .mr(4)
16241 .nr(4)
16242 .kr(2)
16243 .sr(1)
16244 .m(4)
16245 .n(4)
16246 .k(k)
16247 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
16248 }
16249 }
16250 }
16251
16252 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, n_gt_4_strided_cn) {
16253 TEST_REQUIRES_X86_AVX;
16254 for (uint32_t n = 5; n < 8; n++) {
16255 for (size_t k = 1; k <= 40; k += 9) {
16256 GemmMicrokernelTester()
16257 .mr(4)
16258 .nr(4)
16259 .kr(2)
16260 .sr(1)
16261 .m(4)
16262 .n(4)
16263 .k(k)
16264 .cn_stride(7)
16265 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
16266 }
16267 }
16268 }
16269
16270 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, n_gt_4_subtile) {
16271 TEST_REQUIRES_X86_AVX;
16272 for (uint32_t n = 5; n < 8; n++) {
16273 for (size_t k = 1; k <= 40; k += 9) {
16274 for (uint32_t m = 1; m <= 4; m++) {
16275 GemmMicrokernelTester()
16276 .mr(4)
16277 .nr(4)
16278 .kr(2)
16279 .sr(1)
16280 .m(m)
16281 .n(n)
16282 .k(k)
16283 .iterations(1)
16284 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
16285 }
16286 }
16287 }
16288 }
16289
16290 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, n_div_4) {
16291 TEST_REQUIRES_X86_AVX;
16292 for (uint32_t n = 8; n <= 12; n += 4) {
16293 for (size_t k = 1; k <= 40; k += 9) {
16294 GemmMicrokernelTester()
16295 .mr(4)
16296 .nr(4)
16297 .kr(2)
16298 .sr(1)
16299 .m(4)
16300 .n(4)
16301 .k(k)
16302 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
16303 }
16304 }
16305 }
16306
16307 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, n_div_4_strided_cn) {
16308 TEST_REQUIRES_X86_AVX;
16309 for (uint32_t n = 8; n <= 12; n += 4) {
16310 for (size_t k = 1; k <= 40; k += 9) {
16311 GemmMicrokernelTester()
16312 .mr(4)
16313 .nr(4)
16314 .kr(2)
16315 .sr(1)
16316 .m(4)
16317 .n(n)
16318 .k(k)
16319 .cn_stride(7)
16320 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
16321 }
16322 }
16323 }
16324
16325 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, n_div_4_subtile) {
16326 TEST_REQUIRES_X86_AVX;
16327 for (uint32_t n = 8; n <= 12; n += 4) {
16328 for (size_t k = 1; k <= 40; k += 9) {
16329 for (uint32_t m = 1; m <= 4; m++) {
16330 GemmMicrokernelTester()
16331 .mr(4)
16332 .nr(4)
16333 .kr(2)
16334 .sr(1)
16335 .m(m)
16336 .n(n)
16337 .k(k)
16338 .iterations(1)
16339 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
16340 }
16341 }
16342 }
16343 }
16344
16345 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, small_kernel) {
16346 TEST_REQUIRES_X86_AVX;
16347 for (size_t k = 1; k <= 40; k += 9) {
16348 GemmMicrokernelTester()
16349 .mr(4)
16350 .nr(4)
16351 .kr(2)
16352 .sr(1)
16353 .m(4)
16354 .n(4)
16355 .k(k)
16356 .ks(3)
16357 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
16358 }
16359 }
16360
16361 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, small_kernel_subtile) {
16362 TEST_REQUIRES_X86_AVX;
16363 for (size_t k = 1; k <= 40; k += 9) {
16364 for (uint32_t m = 1; m <= 4; m++) {
16365 for (uint32_t n = 1; n <= 4; n++) {
16366 GemmMicrokernelTester()
16367 .mr(4)
16368 .nr(4)
16369 .kr(2)
16370 .sr(1)
16371 .m(m)
16372 .n(n)
16373 .k(k)
16374 .ks(3)
16375 .iterations(1)
16376 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
16377 }
16378 }
16379 }
16380 }
16381
16382 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, n_gt_4_small_kernel) {
16383 TEST_REQUIRES_X86_AVX;
16384 for (uint32_t n = 5; n < 8; n++) {
16385 for (size_t k = 1; k <= 40; k += 9) {
16386 GemmMicrokernelTester()
16387 .mr(4)
16388 .nr(4)
16389 .kr(2)
16390 .sr(1)
16391 .m(4)
16392 .n(4)
16393 .k(k)
16394 .ks(3)
16395 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
16396 }
16397 }
16398 }
16399
16400 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, n_div_4_small_kernel) {
16401 TEST_REQUIRES_X86_AVX;
16402 for (uint32_t n = 8; n <= 12; n += 4) {
16403 for (size_t k = 1; k <= 40; k += 9) {
16404 GemmMicrokernelTester()
16405 .mr(4)
16406 .nr(4)
16407 .kr(2)
16408 .sr(1)
16409 .m(4)
16410 .n(4)
16411 .k(k)
16412 .ks(3)
16413 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
16414 }
16415 }
16416 }
16417
16418 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, strided_cm_subtile) {
16419 TEST_REQUIRES_X86_AVX;
16420 for (size_t k = 1; k <= 40; k += 9) {
16421 for (uint32_t m = 1; m <= 4; m++) {
16422 for (uint32_t n = 1; n <= 4; n++) {
16423 GemmMicrokernelTester()
16424 .mr(4)
16425 .nr(4)
16426 .kr(2)
16427 .sr(1)
16428 .m(m)
16429 .n(n)
16430 .k(k)
16431 .cm_stride(7)
16432 .iterations(1)
16433 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
16434 }
16435 }
16436 }
16437 }
16438
16439 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, a_offset) {
16440 TEST_REQUIRES_X86_AVX;
16441 for (size_t k = 1; k <= 40; k += 9) {
16442 GemmMicrokernelTester()
16443 .mr(4)
16444 .nr(4)
16445 .kr(2)
16446 .sr(1)
16447 .m(4)
16448 .n(4)
16449 .k(k)
16450 .ks(3)
16451 .a_offset(163)
16452 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
16453 }
16454 }
16455
16456 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, zero) {
16457 TEST_REQUIRES_X86_AVX;
16458 for (uint32_t mz = 0; mz < 4; mz++) {
16459 for (size_t k = 1; k <= 40; k += 9) {
16460 GemmMicrokernelTester()
16461 .mr(4)
16462 .nr(4)
16463 .kr(2)
16464 .sr(1)
16465 .m(4)
16466 .n(4)
16467 .k(k)
16468 .ks(3)
16469 .a_offset(163)
16470 .zero_index(mz)
16471 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
16472 }
16473 }
16474 }
16475
16476 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, qmin) {
16477 TEST_REQUIRES_X86_AVX;
16478 GemmMicrokernelTester()
16479 .mr(4)
16480 .nr(4)
16481 .kr(2)
16482 .sr(1)
16483 .m(4)
16484 .n(4)
16485 .k(8)
16486 .qmin(128)
16487 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
16488 }
16489
16490 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, qmax) {
16491 TEST_REQUIRES_X86_AVX;
16492 GemmMicrokernelTester()
16493 .mr(4)
16494 .nr(4)
16495 .kr(2)
16496 .sr(1)
16497 .m(4)
16498 .n(4)
16499 .k(8)
16500 .qmax(128)
16501 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
16502 }
16503
16504 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, strided_cm) {
16505 TEST_REQUIRES_X86_AVX;
16506 GemmMicrokernelTester()
16507 .mr(4)
16508 .nr(4)
16509 .kr(2)
16510 .sr(1)
16511 .m(4)
16512 .n(4)
16513 .k(8)
16514 .cm_stride(7)
16515 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
16516 }
16517
16518 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, no_a_zero_point) {
16519 TEST_REQUIRES_X86_AVX;
16520 for (size_t k = 1; k <= 40; k += 9) {
16521 GemmMicrokernelTester()
16522 .mr(4)
16523 .nr(4)
16524 .kr(2)
16525 .sr(1)
16526 .m(4)
16527 .n(4)
16528 .k(k)
16529 .a_zero_point(0)
16530 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
16531 }
16532 }
16533
16534 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, no_b_zero_point) {
16535 TEST_REQUIRES_X86_AVX;
16536 for (size_t k = 1; k <= 40; k += 9) {
16537 GemmMicrokernelTester()
16538 .mr(4)
16539 .nr(4)
16540 .kr(2)
16541 .sr(1)
16542 .m(4)
16543 .n(4)
16544 .k(k)
16545 .b_zero_point(0)
16546 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
16547 }
16548 }
16549
16550 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, no_zero_point) {
16551 TEST_REQUIRES_X86_AVX;
16552 for (size_t k = 1; k <= 40; k += 9) {
16553 GemmMicrokernelTester()
16554 .mr(4)
16555 .nr(4)
16556 .kr(2)
16557 .sr(1)
16558 .m(4)
16559 .n(4)
16560 .k(k)
16561 .a_zero_point(0)
16562 .b_zero_point(0)
16563 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
16564 }
16565 }
16566#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
16567
16568
16569#if XNN_ARCH_X86 || XNN_ARCH_X86_64
16570 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_eq_8) {
16571 TEST_REQUIRES_X86_XOP;
16572 GemmMicrokernelTester()
16573 .mr(1)
16574 .nr(4)
16575 .kr(2)
16576 .sr(1)
16577 .m(1)
16578 .n(4)
16579 .k(8)
16580 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
16581 }
16582
16583 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, strided_cn) {
16584 TEST_REQUIRES_X86_XOP;
16585 GemmMicrokernelTester()
16586 .mr(1)
16587 .nr(4)
16588 .kr(2)
16589 .sr(1)
16590 .m(1)
16591 .n(4)
16592 .k(8)
16593 .cn_stride(7)
16594 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
16595 }
16596
16597 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_eq_8_subtile) {
16598 TEST_REQUIRES_X86_XOP;
16599 for (uint32_t m = 1; m <= 1; m++) {
16600 for (uint32_t n = 1; n <= 4; n++) {
16601 GemmMicrokernelTester()
16602 .mr(1)
16603 .nr(4)
16604 .kr(2)
16605 .sr(1)
16606 .m(m)
16607 .n(n)
16608 .k(8)
16609 .iterations(1)
16610 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
16611 }
16612 }
16613 }
16614
16615 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_eq_8_subtile_m) {
16616 TEST_REQUIRES_X86_XOP;
16617 for (uint32_t m = 1; m <= 1; m++) {
16618 GemmMicrokernelTester()
16619 .mr(1)
16620 .nr(4)
16621 .kr(2)
16622 .sr(1)
16623 .m(m)
16624 .n(4)
16625 .k(8)
16626 .iterations(1)
16627 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
16628 }
16629 }
16630
16631 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_eq_8_subtile_n) {
16632 TEST_REQUIRES_X86_XOP;
16633 for (uint32_t n = 1; n <= 4; n++) {
16634 GemmMicrokernelTester()
16635 .mr(1)
16636 .nr(4)
16637 .kr(2)
16638 .sr(1)
16639 .m(1)
16640 .n(n)
16641 .k(8)
16642 .iterations(1)
16643 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
16644 }
16645 }
16646
16647 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_lt_8) {
16648 TEST_REQUIRES_X86_XOP;
16649 for (size_t k = 1; k < 8; k++) {
16650 GemmMicrokernelTester()
16651 .mr(1)
16652 .nr(4)
16653 .kr(2)
16654 .sr(1)
16655 .m(1)
16656 .n(4)
16657 .k(k)
16658 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
16659 }
16660 }
16661
16662 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_lt_8_subtile) {
16663 TEST_REQUIRES_X86_XOP;
16664 for (size_t k = 1; k < 8; k++) {
16665 for (uint32_t m = 1; m <= 1; m++) {
16666 for (uint32_t n = 1; n <= 4; n++) {
16667 GemmMicrokernelTester()
16668 .mr(1)
16669 .nr(4)
16670 .kr(2)
16671 .sr(1)
16672 .m(m)
16673 .n(n)
16674 .k(k)
16675 .iterations(1)
16676 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
16677 }
16678 }
16679 }
16680 }
16681
16682 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_gt_8) {
16683 TEST_REQUIRES_X86_XOP;
16684 for (size_t k = 9; k < 16; k++) {
16685 GemmMicrokernelTester()
16686 .mr(1)
16687 .nr(4)
16688 .kr(2)
16689 .sr(1)
16690 .m(1)
16691 .n(4)
16692 .k(k)
16693 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
16694 }
16695 }
16696
16697 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_gt_8_subtile) {
16698 TEST_REQUIRES_X86_XOP;
16699 for (size_t k = 9; k < 16; k++) {
16700 for (uint32_t m = 1; m <= 1; m++) {
16701 for (uint32_t n = 1; n <= 4; n++) {
16702 GemmMicrokernelTester()
16703 .mr(1)
16704 .nr(4)
16705 .kr(2)
16706 .sr(1)
16707 .m(m)
16708 .n(n)
16709 .k(k)
16710 .iterations(1)
16711 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
16712 }
16713 }
16714 }
16715 }
16716
16717 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_div_8) {
16718 TEST_REQUIRES_X86_XOP;
16719 for (size_t k = 16; k <= 80; k += 8) {
16720 GemmMicrokernelTester()
16721 .mr(1)
16722 .nr(4)
16723 .kr(2)
16724 .sr(1)
16725 .m(1)
16726 .n(4)
16727 .k(k)
16728 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
16729 }
16730 }
16731
16732 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_div_8_subtile) {
16733 TEST_REQUIRES_X86_XOP;
16734 for (size_t k = 16; k <= 80; k += 8) {
16735 for (uint32_t m = 1; m <= 1; m++) {
16736 for (uint32_t n = 1; n <= 4; n++) {
16737 GemmMicrokernelTester()
16738 .mr(1)
16739 .nr(4)
16740 .kr(2)
16741 .sr(1)
16742 .m(m)
16743 .n(n)
16744 .k(k)
16745 .iterations(1)
16746 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
16747 }
16748 }
16749 }
16750 }
16751
16752 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_gt_4) {
16753 TEST_REQUIRES_X86_XOP;
16754 for (uint32_t n = 5; n < 8; n++) {
16755 for (size_t k = 1; k <= 40; k += 9) {
16756 GemmMicrokernelTester()
16757 .mr(1)
16758 .nr(4)
16759 .kr(2)
16760 .sr(1)
16761 .m(1)
16762 .n(4)
16763 .k(k)
16764 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
16765 }
16766 }
16767 }
16768
16769 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_gt_4_strided_cn) {
16770 TEST_REQUIRES_X86_XOP;
16771 for (uint32_t n = 5; n < 8; n++) {
16772 for (size_t k = 1; k <= 40; k += 9) {
16773 GemmMicrokernelTester()
16774 .mr(1)
16775 .nr(4)
16776 .kr(2)
16777 .sr(1)
16778 .m(1)
16779 .n(4)
16780 .k(k)
16781 .cn_stride(7)
16782 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
16783 }
16784 }
16785 }
16786
16787 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_gt_4_subtile) {
16788 TEST_REQUIRES_X86_XOP;
16789 for (uint32_t n = 5; n < 8; n++) {
16790 for (size_t k = 1; k <= 40; k += 9) {
16791 for (uint32_t m = 1; m <= 1; m++) {
16792 GemmMicrokernelTester()
16793 .mr(1)
16794 .nr(4)
16795 .kr(2)
16796 .sr(1)
16797 .m(m)
16798 .n(n)
16799 .k(k)
16800 .iterations(1)
16801 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
16802 }
16803 }
16804 }
16805 }
16806
16807 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_div_4) {
16808 TEST_REQUIRES_X86_XOP;
16809 for (uint32_t n = 8; n <= 12; n += 4) {
16810 for (size_t k = 1; k <= 40; k += 9) {
16811 GemmMicrokernelTester()
16812 .mr(1)
16813 .nr(4)
16814 .kr(2)
16815 .sr(1)
16816 .m(1)
16817 .n(4)
16818 .k(k)
16819 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
16820 }
16821 }
16822 }
16823
16824 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_div_4_strided_cn) {
16825 TEST_REQUIRES_X86_XOP;
16826 for (uint32_t n = 8; n <= 12; n += 4) {
16827 for (size_t k = 1; k <= 40; k += 9) {
16828 GemmMicrokernelTester()
16829 .mr(1)
16830 .nr(4)
16831 .kr(2)
16832 .sr(1)
16833 .m(1)
16834 .n(n)
16835 .k(k)
16836 .cn_stride(7)
16837 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
16838 }
16839 }
16840 }
16841
16842 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_div_4_subtile) {
16843 TEST_REQUIRES_X86_XOP;
16844 for (uint32_t n = 8; n <= 12; n += 4) {
16845 for (size_t k = 1; k <= 40; k += 9) {
16846 for (uint32_t m = 1; m <= 1; m++) {
16847 GemmMicrokernelTester()
16848 .mr(1)
16849 .nr(4)
16850 .kr(2)
16851 .sr(1)
16852 .m(m)
16853 .n(n)
16854 .k(k)
16855 .iterations(1)
16856 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
16857 }
16858 }
16859 }
16860 }
16861
16862 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, small_kernel) {
16863 TEST_REQUIRES_X86_XOP;
16864 for (size_t k = 1; k <= 40; k += 9) {
16865 GemmMicrokernelTester()
16866 .mr(1)
16867 .nr(4)
16868 .kr(2)
16869 .sr(1)
16870 .m(1)
16871 .n(4)
16872 .k(k)
16873 .ks(3)
16874 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
16875 }
16876 }
16877
16878 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, small_kernel_subtile) {
16879 TEST_REQUIRES_X86_XOP;
16880 for (size_t k = 1; k <= 40; k += 9) {
16881 for (uint32_t m = 1; m <= 1; m++) {
16882 for (uint32_t n = 1; n <= 4; n++) {
16883 GemmMicrokernelTester()
16884 .mr(1)
16885 .nr(4)
16886 .kr(2)
16887 .sr(1)
16888 .m(m)
16889 .n(n)
16890 .k(k)
16891 .ks(3)
16892 .iterations(1)
16893 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
16894 }
16895 }
16896 }
16897 }
16898
16899 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_gt_4_small_kernel) {
16900 TEST_REQUIRES_X86_XOP;
16901 for (uint32_t n = 5; n < 8; n++) {
16902 for (size_t k = 1; k <= 40; k += 9) {
16903 GemmMicrokernelTester()
16904 .mr(1)
16905 .nr(4)
16906 .kr(2)
16907 .sr(1)
16908 .m(1)
16909 .n(4)
16910 .k(k)
16911 .ks(3)
16912 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
16913 }
16914 }
16915 }
16916
16917 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_div_4_small_kernel) {
16918 TEST_REQUIRES_X86_XOP;
16919 for (uint32_t n = 8; n <= 12; n += 4) {
16920 for (size_t k = 1; k <= 40; k += 9) {
16921 GemmMicrokernelTester()
16922 .mr(1)
16923 .nr(4)
16924 .kr(2)
16925 .sr(1)
16926 .m(1)
16927 .n(4)
16928 .k(k)
16929 .ks(3)
16930 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
16931 }
16932 }
16933 }
16934
16935 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, strided_cm_subtile) {
16936 TEST_REQUIRES_X86_XOP;
16937 for (size_t k = 1; k <= 40; k += 9) {
16938 for (uint32_t m = 1; m <= 1; m++) {
16939 for (uint32_t n = 1; n <= 4; n++) {
16940 GemmMicrokernelTester()
16941 .mr(1)
16942 .nr(4)
16943 .kr(2)
16944 .sr(1)
16945 .m(m)
16946 .n(n)
16947 .k(k)
16948 .cm_stride(7)
16949 .iterations(1)
16950 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
16951 }
16952 }
16953 }
16954 }
16955
16956 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, a_offset) {
16957 TEST_REQUIRES_X86_XOP;
16958 for (size_t k = 1; k <= 40; k += 9) {
16959 GemmMicrokernelTester()
16960 .mr(1)
16961 .nr(4)
16962 .kr(2)
16963 .sr(1)
16964 .m(1)
16965 .n(4)
16966 .k(k)
16967 .ks(3)
16968 .a_offset(43)
16969 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
16970 }
16971 }
16972
16973 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, zero) {
16974 TEST_REQUIRES_X86_XOP;
16975 for (uint32_t mz = 0; mz < 1; mz++) {
16976 for (size_t k = 1; k <= 40; k += 9) {
16977 GemmMicrokernelTester()
16978 .mr(1)
16979 .nr(4)
16980 .kr(2)
16981 .sr(1)
16982 .m(1)
16983 .n(4)
16984 .k(k)
16985 .ks(3)
16986 .a_offset(43)
16987 .zero_index(mz)
16988 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
16989 }
16990 }
16991 }
16992
16993 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, qmin) {
16994 TEST_REQUIRES_X86_XOP;
16995 GemmMicrokernelTester()
16996 .mr(1)
16997 .nr(4)
16998 .kr(2)
16999 .sr(1)
17000 .m(1)
17001 .n(4)
17002 .k(8)
17003 .qmin(128)
17004 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
17005 }
17006
17007 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, qmax) {
17008 TEST_REQUIRES_X86_XOP;
17009 GemmMicrokernelTester()
17010 .mr(1)
17011 .nr(4)
17012 .kr(2)
17013 .sr(1)
17014 .m(1)
17015 .n(4)
17016 .k(8)
17017 .qmax(128)
17018 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
17019 }
17020
17021 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, strided_cm) {
17022 TEST_REQUIRES_X86_XOP;
17023 GemmMicrokernelTester()
17024 .mr(1)
17025 .nr(4)
17026 .kr(2)
17027 .sr(1)
17028 .m(1)
17029 .n(4)
17030 .k(8)
17031 .cm_stride(7)
17032 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
17033 }
17034
17035 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, no_a_zero_point) {
17036 TEST_REQUIRES_X86_XOP;
17037 for (size_t k = 1; k <= 40; k += 9) {
17038 GemmMicrokernelTester()
17039 .mr(1)
17040 .nr(4)
17041 .kr(2)
17042 .sr(1)
17043 .m(1)
17044 .n(4)
17045 .k(k)
17046 .a_zero_point(0)
17047 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
17048 }
17049 }
17050
17051 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, no_b_zero_point) {
17052 TEST_REQUIRES_X86_XOP;
17053 for (size_t k = 1; k <= 40; k += 9) {
17054 GemmMicrokernelTester()
17055 .mr(1)
17056 .nr(4)
17057 .kr(2)
17058 .sr(1)
17059 .m(1)
17060 .n(4)
17061 .k(k)
17062 .b_zero_point(0)
17063 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
17064 }
17065 }
17066
17067 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, no_zero_point) {
17068 TEST_REQUIRES_X86_XOP;
17069 for (size_t k = 1; k <= 40; k += 9) {
17070 GemmMicrokernelTester()
17071 .mr(1)
17072 .nr(4)
17073 .kr(2)
17074 .sr(1)
17075 .m(1)
17076 .n(4)
17077 .k(k)
17078 .a_zero_point(0)
17079 .b_zero_point(0)
17080 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
17081 }
17082 }
17083#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
17084
17085
17086#if XNN_ARCH_X86 || XNN_ARCH_X86_64
17087 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_eq_8) {
17088 TEST_REQUIRES_X86_XOP;
17089 GemmMicrokernelTester()
17090 .mr(2)
17091 .nr(4)
17092 .kr(2)
17093 .sr(1)
17094 .m(2)
17095 .n(4)
17096 .k(8)
17097 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
17098 }
17099
17100 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, strided_cn) {
17101 TEST_REQUIRES_X86_XOP;
17102 GemmMicrokernelTester()
17103 .mr(2)
17104 .nr(4)
17105 .kr(2)
17106 .sr(1)
17107 .m(2)
17108 .n(4)
17109 .k(8)
17110 .cn_stride(7)
17111 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
17112 }
17113
17114 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_eq_8_subtile) {
17115 TEST_REQUIRES_X86_XOP;
17116 for (uint32_t m = 1; m <= 2; m++) {
17117 for (uint32_t n = 1; n <= 4; n++) {
17118 GemmMicrokernelTester()
17119 .mr(2)
17120 .nr(4)
17121 .kr(2)
17122 .sr(1)
17123 .m(m)
17124 .n(n)
17125 .k(8)
17126 .iterations(1)
17127 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
17128 }
17129 }
17130 }
17131
17132 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_eq_8_subtile_m) {
17133 TEST_REQUIRES_X86_XOP;
17134 for (uint32_t m = 1; m <= 2; m++) {
17135 GemmMicrokernelTester()
17136 .mr(2)
17137 .nr(4)
17138 .kr(2)
17139 .sr(1)
17140 .m(m)
17141 .n(4)
17142 .k(8)
17143 .iterations(1)
17144 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
17145 }
17146 }
17147
17148 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_eq_8_subtile_n) {
17149 TEST_REQUIRES_X86_XOP;
17150 for (uint32_t n = 1; n <= 4; n++) {
17151 GemmMicrokernelTester()
17152 .mr(2)
17153 .nr(4)
17154 .kr(2)
17155 .sr(1)
17156 .m(2)
17157 .n(n)
17158 .k(8)
17159 .iterations(1)
17160 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
17161 }
17162 }
17163
17164 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_lt_8) {
17165 TEST_REQUIRES_X86_XOP;
17166 for (size_t k = 1; k < 8; k++) {
17167 GemmMicrokernelTester()
17168 .mr(2)
17169 .nr(4)
17170 .kr(2)
17171 .sr(1)
17172 .m(2)
17173 .n(4)
17174 .k(k)
17175 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
17176 }
17177 }
17178
17179 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_lt_8_subtile) {
17180 TEST_REQUIRES_X86_XOP;
17181 for (size_t k = 1; k < 8; k++) {
17182 for (uint32_t m = 1; m <= 2; m++) {
17183 for (uint32_t n = 1; n <= 4; n++) {
17184 GemmMicrokernelTester()
17185 .mr(2)
17186 .nr(4)
17187 .kr(2)
17188 .sr(1)
17189 .m(m)
17190 .n(n)
17191 .k(k)
17192 .iterations(1)
17193 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
17194 }
17195 }
17196 }
17197 }
17198
17199 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_gt_8) {
17200 TEST_REQUIRES_X86_XOP;
17201 for (size_t k = 9; k < 16; k++) {
17202 GemmMicrokernelTester()
17203 .mr(2)
17204 .nr(4)
17205 .kr(2)
17206 .sr(1)
17207 .m(2)
17208 .n(4)
17209 .k(k)
17210 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
17211 }
17212 }
17213
17214 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_gt_8_subtile) {
17215 TEST_REQUIRES_X86_XOP;
17216 for (size_t k = 9; k < 16; k++) {
17217 for (uint32_t m = 1; m <= 2; m++) {
17218 for (uint32_t n = 1; n <= 4; n++) {
17219 GemmMicrokernelTester()
17220 .mr(2)
17221 .nr(4)
17222 .kr(2)
17223 .sr(1)
17224 .m(m)
17225 .n(n)
17226 .k(k)
17227 .iterations(1)
17228 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
17229 }
17230 }
17231 }
17232 }
17233
17234 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_div_8) {
17235 TEST_REQUIRES_X86_XOP;
17236 for (size_t k = 16; k <= 80; k += 8) {
17237 GemmMicrokernelTester()
17238 .mr(2)
17239 .nr(4)
17240 .kr(2)
17241 .sr(1)
17242 .m(2)
17243 .n(4)
17244 .k(k)
17245 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
17246 }
17247 }
17248
17249 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_div_8_subtile) {
17250 TEST_REQUIRES_X86_XOP;
17251 for (size_t k = 16; k <= 80; k += 8) {
17252 for (uint32_t m = 1; m <= 2; m++) {
17253 for (uint32_t n = 1; n <= 4; n++) {
17254 GemmMicrokernelTester()
17255 .mr(2)
17256 .nr(4)
17257 .kr(2)
17258 .sr(1)
17259 .m(m)
17260 .n(n)
17261 .k(k)
17262 .iterations(1)
17263 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
17264 }
17265 }
17266 }
17267 }
17268
17269 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_gt_4) {
17270 TEST_REQUIRES_X86_XOP;
17271 for (uint32_t n = 5; n < 8; n++) {
17272 for (size_t k = 1; k <= 40; k += 9) {
17273 GemmMicrokernelTester()
17274 .mr(2)
17275 .nr(4)
17276 .kr(2)
17277 .sr(1)
17278 .m(2)
17279 .n(4)
17280 .k(k)
17281 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
17282 }
17283 }
17284 }
17285
17286 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_gt_4_strided_cn) {
17287 TEST_REQUIRES_X86_XOP;
17288 for (uint32_t n = 5; n < 8; n++) {
17289 for (size_t k = 1; k <= 40; k += 9) {
17290 GemmMicrokernelTester()
17291 .mr(2)
17292 .nr(4)
17293 .kr(2)
17294 .sr(1)
17295 .m(2)
17296 .n(4)
17297 .k(k)
17298 .cn_stride(7)
17299 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
17300 }
17301 }
17302 }
17303
17304 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_gt_4_subtile) {
17305 TEST_REQUIRES_X86_XOP;
17306 for (uint32_t n = 5; n < 8; n++) {
17307 for (size_t k = 1; k <= 40; k += 9) {
17308 for (uint32_t m = 1; m <= 2; m++) {
17309 GemmMicrokernelTester()
17310 .mr(2)
17311 .nr(4)
17312 .kr(2)
17313 .sr(1)
17314 .m(m)
17315 .n(n)
17316 .k(k)
17317 .iterations(1)
17318 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
17319 }
17320 }
17321 }
17322 }
17323
17324 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_div_4) {
17325 TEST_REQUIRES_X86_XOP;
17326 for (uint32_t n = 8; n <= 12; n += 4) {
17327 for (size_t k = 1; k <= 40; k += 9) {
17328 GemmMicrokernelTester()
17329 .mr(2)
17330 .nr(4)
17331 .kr(2)
17332 .sr(1)
17333 .m(2)
17334 .n(4)
17335 .k(k)
17336 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
17337 }
17338 }
17339 }
17340
17341 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_div_4_strided_cn) {
17342 TEST_REQUIRES_X86_XOP;
17343 for (uint32_t n = 8; n <= 12; n += 4) {
17344 for (size_t k = 1; k <= 40; k += 9) {
17345 GemmMicrokernelTester()
17346 .mr(2)
17347 .nr(4)
17348 .kr(2)
17349 .sr(1)
17350 .m(2)
17351 .n(n)
17352 .k(k)
17353 .cn_stride(7)
17354 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
17355 }
17356 }
17357 }
17358
17359 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_div_4_subtile) {
17360 TEST_REQUIRES_X86_XOP;
17361 for (uint32_t n = 8; n <= 12; n += 4) {
17362 for (size_t k = 1; k <= 40; k += 9) {
17363 for (uint32_t m = 1; m <= 2; m++) {
17364 GemmMicrokernelTester()
17365 .mr(2)
17366 .nr(4)
17367 .kr(2)
17368 .sr(1)
17369 .m(m)
17370 .n(n)
17371 .k(k)
17372 .iterations(1)
17373 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
17374 }
17375 }
17376 }
17377 }
17378
17379 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, small_kernel) {
17380 TEST_REQUIRES_X86_XOP;
17381 for (size_t k = 1; k <= 40; k += 9) {
17382 GemmMicrokernelTester()
17383 .mr(2)
17384 .nr(4)
17385 .kr(2)
17386 .sr(1)
17387 .m(2)
17388 .n(4)
17389 .k(k)
17390 .ks(3)
17391 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
17392 }
17393 }
17394
17395 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, small_kernel_subtile) {
17396 TEST_REQUIRES_X86_XOP;
17397 for (size_t k = 1; k <= 40; k += 9) {
17398 for (uint32_t m = 1; m <= 2; m++) {
17399 for (uint32_t n = 1; n <= 4; n++) {
17400 GemmMicrokernelTester()
17401 .mr(2)
17402 .nr(4)
17403 .kr(2)
17404 .sr(1)
17405 .m(m)
17406 .n(n)
17407 .k(k)
17408 .ks(3)
17409 .iterations(1)
17410 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
17411 }
17412 }
17413 }
17414 }
17415
17416 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_gt_4_small_kernel) {
17417 TEST_REQUIRES_X86_XOP;
17418 for (uint32_t n = 5; n < 8; n++) {
17419 for (size_t k = 1; k <= 40; k += 9) {
17420 GemmMicrokernelTester()
17421 .mr(2)
17422 .nr(4)
17423 .kr(2)
17424 .sr(1)
17425 .m(2)
17426 .n(4)
17427 .k(k)
17428 .ks(3)
17429 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
17430 }
17431 }
17432 }
17433
17434 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_div_4_small_kernel) {
17435 TEST_REQUIRES_X86_XOP;
17436 for (uint32_t n = 8; n <= 12; n += 4) {
17437 for (size_t k = 1; k <= 40; k += 9) {
17438 GemmMicrokernelTester()
17439 .mr(2)
17440 .nr(4)
17441 .kr(2)
17442 .sr(1)
17443 .m(2)
17444 .n(4)
17445 .k(k)
17446 .ks(3)
17447 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
17448 }
17449 }
17450 }
17451
17452 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, strided_cm_subtile) {
17453 TEST_REQUIRES_X86_XOP;
17454 for (size_t k = 1; k <= 40; k += 9) {
17455 for (uint32_t m = 1; m <= 2; m++) {
17456 for (uint32_t n = 1; n <= 4; n++) {
17457 GemmMicrokernelTester()
17458 .mr(2)
17459 .nr(4)
17460 .kr(2)
17461 .sr(1)
17462 .m(m)
17463 .n(n)
17464 .k(k)
17465 .cm_stride(7)
17466 .iterations(1)
17467 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
17468 }
17469 }
17470 }
17471 }
17472
17473 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, a_offset) {
17474 TEST_REQUIRES_X86_XOP;
17475 for (size_t k = 1; k <= 40; k += 9) {
17476 GemmMicrokernelTester()
17477 .mr(2)
17478 .nr(4)
17479 .kr(2)
17480 .sr(1)
17481 .m(2)
17482 .n(4)
17483 .k(k)
17484 .ks(3)
17485 .a_offset(83)
17486 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
17487 }
17488 }
17489
17490 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, zero) {
17491 TEST_REQUIRES_X86_XOP;
17492 for (uint32_t mz = 0; mz < 2; mz++) {
17493 for (size_t k = 1; k <= 40; k += 9) {
17494 GemmMicrokernelTester()
17495 .mr(2)
17496 .nr(4)
17497 .kr(2)
17498 .sr(1)
17499 .m(2)
17500 .n(4)
17501 .k(k)
17502 .ks(3)
17503 .a_offset(83)
17504 .zero_index(mz)
17505 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
17506 }
17507 }
17508 }
17509
17510 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, qmin) {
17511 TEST_REQUIRES_X86_XOP;
17512 GemmMicrokernelTester()
17513 .mr(2)
17514 .nr(4)
17515 .kr(2)
17516 .sr(1)
17517 .m(2)
17518 .n(4)
17519 .k(8)
17520 .qmin(128)
17521 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
17522 }
17523
17524 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, qmax) {
17525 TEST_REQUIRES_X86_XOP;
17526 GemmMicrokernelTester()
17527 .mr(2)
17528 .nr(4)
17529 .kr(2)
17530 .sr(1)
17531 .m(2)
17532 .n(4)
17533 .k(8)
17534 .qmax(128)
17535 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
17536 }
17537
17538 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, strided_cm) {
17539 TEST_REQUIRES_X86_XOP;
17540 GemmMicrokernelTester()
17541 .mr(2)
17542 .nr(4)
17543 .kr(2)
17544 .sr(1)
17545 .m(2)
17546 .n(4)
17547 .k(8)
17548 .cm_stride(7)
17549 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
17550 }
17551
17552 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, no_a_zero_point) {
17553 TEST_REQUIRES_X86_XOP;
17554 for (size_t k = 1; k <= 40; k += 9) {
17555 GemmMicrokernelTester()
17556 .mr(2)
17557 .nr(4)
17558 .kr(2)
17559 .sr(1)
17560 .m(2)
17561 .n(4)
17562 .k(k)
17563 .a_zero_point(0)
17564 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
17565 }
17566 }
17567
17568 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, no_b_zero_point) {
17569 TEST_REQUIRES_X86_XOP;
17570 for (size_t k = 1; k <= 40; k += 9) {
17571 GemmMicrokernelTester()
17572 .mr(2)
17573 .nr(4)
17574 .kr(2)
17575 .sr(1)
17576 .m(2)
17577 .n(4)
17578 .k(k)
17579 .b_zero_point(0)
17580 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
17581 }
17582 }
17583
17584 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, no_zero_point) {
17585 TEST_REQUIRES_X86_XOP;
17586 for (size_t k = 1; k <= 40; k += 9) {
17587 GemmMicrokernelTester()
17588 .mr(2)
17589 .nr(4)
17590 .kr(2)
17591 .sr(1)
17592 .m(2)
17593 .n(4)
17594 .k(k)
17595 .a_zero_point(0)
17596 .b_zero_point(0)
17597 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
17598 }
17599 }
17600#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
17601
17602
17603#if XNN_ARCH_X86 || XNN_ARCH_X86_64
17604 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_eq_8) {
17605 TEST_REQUIRES_X86_XOP;
17606 GemmMicrokernelTester()
17607 .mr(3)
17608 .nr(4)
17609 .kr(2)
17610 .sr(1)
17611 .m(3)
17612 .n(4)
17613 .k(8)
17614 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
17615 }
17616
17617 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, strided_cn) {
17618 TEST_REQUIRES_X86_XOP;
17619 GemmMicrokernelTester()
17620 .mr(3)
17621 .nr(4)
17622 .kr(2)
17623 .sr(1)
17624 .m(3)
17625 .n(4)
17626 .k(8)
17627 .cn_stride(7)
17628 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
17629 }
17630
17631 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_eq_8_subtile) {
17632 TEST_REQUIRES_X86_XOP;
17633 for (uint32_t m = 1; m <= 3; m++) {
17634 for (uint32_t n = 1; n <= 4; n++) {
17635 GemmMicrokernelTester()
17636 .mr(3)
17637 .nr(4)
17638 .kr(2)
17639 .sr(1)
17640 .m(m)
17641 .n(n)
17642 .k(8)
17643 .iterations(1)
17644 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
17645 }
17646 }
17647 }
17648
17649 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_eq_8_subtile_m) {
17650 TEST_REQUIRES_X86_XOP;
17651 for (uint32_t m = 1; m <= 3; m++) {
17652 GemmMicrokernelTester()
17653 .mr(3)
17654 .nr(4)
17655 .kr(2)
17656 .sr(1)
17657 .m(m)
17658 .n(4)
17659 .k(8)
17660 .iterations(1)
17661 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
17662 }
17663 }
17664
17665 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_eq_8_subtile_n) {
17666 TEST_REQUIRES_X86_XOP;
17667 for (uint32_t n = 1; n <= 4; n++) {
17668 GemmMicrokernelTester()
17669 .mr(3)
17670 .nr(4)
17671 .kr(2)
17672 .sr(1)
17673 .m(3)
17674 .n(n)
17675 .k(8)
17676 .iterations(1)
17677 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
17678 }
17679 }
17680
17681 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_lt_8) {
17682 TEST_REQUIRES_X86_XOP;
17683 for (size_t k = 1; k < 8; k++) {
17684 GemmMicrokernelTester()
17685 .mr(3)
17686 .nr(4)
17687 .kr(2)
17688 .sr(1)
17689 .m(3)
17690 .n(4)
17691 .k(k)
17692 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
17693 }
17694 }
17695
17696 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_lt_8_subtile) {
17697 TEST_REQUIRES_X86_XOP;
17698 for (size_t k = 1; k < 8; k++) {
17699 for (uint32_t m = 1; m <= 3; m++) {
17700 for (uint32_t n = 1; n <= 4; n++) {
17701 GemmMicrokernelTester()
17702 .mr(3)
17703 .nr(4)
17704 .kr(2)
17705 .sr(1)
17706 .m(m)
17707 .n(n)
17708 .k(k)
17709 .iterations(1)
17710 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
17711 }
17712 }
17713 }
17714 }
17715
17716 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_gt_8) {
17717 TEST_REQUIRES_X86_XOP;
17718 for (size_t k = 9; k < 16; k++) {
17719 GemmMicrokernelTester()
17720 .mr(3)
17721 .nr(4)
17722 .kr(2)
17723 .sr(1)
17724 .m(3)
17725 .n(4)
17726 .k(k)
17727 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
17728 }
17729 }
17730
17731 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_gt_8_subtile) {
17732 TEST_REQUIRES_X86_XOP;
17733 for (size_t k = 9; k < 16; k++) {
17734 for (uint32_t m = 1; m <= 3; m++) {
17735 for (uint32_t n = 1; n <= 4; n++) {
17736 GemmMicrokernelTester()
17737 .mr(3)
17738 .nr(4)
17739 .kr(2)
17740 .sr(1)
17741 .m(m)
17742 .n(n)
17743 .k(k)
17744 .iterations(1)
17745 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
17746 }
17747 }
17748 }
17749 }
17750
17751 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_div_8) {
17752 TEST_REQUIRES_X86_XOP;
17753 for (size_t k = 16; k <= 80; k += 8) {
17754 GemmMicrokernelTester()
17755 .mr(3)
17756 .nr(4)
17757 .kr(2)
17758 .sr(1)
17759 .m(3)
17760 .n(4)
17761 .k(k)
17762 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
17763 }
17764 }
17765
17766 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_div_8_subtile) {
17767 TEST_REQUIRES_X86_XOP;
17768 for (size_t k = 16; k <= 80; k += 8) {
17769 for (uint32_t m = 1; m <= 3; m++) {
17770 for (uint32_t n = 1; n <= 4; n++) {
17771 GemmMicrokernelTester()
17772 .mr(3)
17773 .nr(4)
17774 .kr(2)
17775 .sr(1)
17776 .m(m)
17777 .n(n)
17778 .k(k)
17779 .iterations(1)
17780 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
17781 }
17782 }
17783 }
17784 }
17785
17786 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, n_gt_4) {
17787 TEST_REQUIRES_X86_XOP;
17788 for (uint32_t n = 5; n < 8; n++) {
17789 for (size_t k = 1; k <= 40; k += 9) {
17790 GemmMicrokernelTester()
17791 .mr(3)
17792 .nr(4)
17793 .kr(2)
17794 .sr(1)
17795 .m(3)
17796 .n(4)
17797 .k(k)
17798 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
17799 }
17800 }
17801 }
17802
17803 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, n_gt_4_strided_cn) {
17804 TEST_REQUIRES_X86_XOP;
17805 for (uint32_t n = 5; n < 8; n++) {
17806 for (size_t k = 1; k <= 40; k += 9) {
17807 GemmMicrokernelTester()
17808 .mr(3)
17809 .nr(4)
17810 .kr(2)
17811 .sr(1)
17812 .m(3)
17813 .n(4)
17814 .k(k)
17815 .cn_stride(7)
17816 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
17817 }
17818 }
17819 }
17820
17821 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, n_gt_4_subtile) {
17822 TEST_REQUIRES_X86_XOP;
17823 for (uint32_t n = 5; n < 8; n++) {
17824 for (size_t k = 1; k <= 40; k += 9) {
17825 for (uint32_t m = 1; m <= 3; m++) {
17826 GemmMicrokernelTester()
17827 .mr(3)
17828 .nr(4)
17829 .kr(2)
17830 .sr(1)
17831 .m(m)
17832 .n(n)
17833 .k(k)
17834 .iterations(1)
17835 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
17836 }
17837 }
17838 }
17839 }
17840
17841 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, n_div_4) {
17842 TEST_REQUIRES_X86_XOP;
17843 for (uint32_t n = 8; n <= 12; n += 4) {
17844 for (size_t k = 1; k <= 40; k += 9) {
17845 GemmMicrokernelTester()
17846 .mr(3)
17847 .nr(4)
17848 .kr(2)
17849 .sr(1)
17850 .m(3)
17851 .n(4)
17852 .k(k)
17853 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
17854 }
17855 }
17856 }
17857
17858 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, n_div_4_strided_cn) {
17859 TEST_REQUIRES_X86_XOP;
17860 for (uint32_t n = 8; n <= 12; n += 4) {
17861 for (size_t k = 1; k <= 40; k += 9) {
17862 GemmMicrokernelTester()
17863 .mr(3)
17864 .nr(4)
17865 .kr(2)
17866 .sr(1)
17867 .m(3)
17868 .n(n)
17869 .k(k)
17870 .cn_stride(7)
17871 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
17872 }
17873 }
17874 }
17875
17876 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, n_div_4_subtile) {
17877 TEST_REQUIRES_X86_XOP;
17878 for (uint32_t n = 8; n <= 12; n += 4) {
17879 for (size_t k = 1; k <= 40; k += 9) {
17880 for (uint32_t m = 1; m <= 3; m++) {
17881 GemmMicrokernelTester()
17882 .mr(3)
17883 .nr(4)
17884 .kr(2)
17885 .sr(1)
17886 .m(m)
17887 .n(n)
17888 .k(k)
17889 .iterations(1)
17890 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
17891 }
17892 }
17893 }
17894 }
17895
17896 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, small_kernel) {
17897 TEST_REQUIRES_X86_XOP;
17898 for (size_t k = 1; k <= 40; k += 9) {
17899 GemmMicrokernelTester()
17900 .mr(3)
17901 .nr(4)
17902 .kr(2)
17903 .sr(1)
17904 .m(3)
17905 .n(4)
17906 .k(k)
17907 .ks(3)
17908 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
17909 }
17910 }
17911
17912 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, small_kernel_subtile) {
17913 TEST_REQUIRES_X86_XOP;
17914 for (size_t k = 1; k <= 40; k += 9) {
17915 for (uint32_t m = 1; m <= 3; m++) {
17916 for (uint32_t n = 1; n <= 4; n++) {
17917 GemmMicrokernelTester()
17918 .mr(3)
17919 .nr(4)
17920 .kr(2)
17921 .sr(1)
17922 .m(m)
17923 .n(n)
17924 .k(k)
17925 .ks(3)
17926 .iterations(1)
17927 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
17928 }
17929 }
17930 }
17931 }
17932
17933 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, n_gt_4_small_kernel) {
17934 TEST_REQUIRES_X86_XOP;
17935 for (uint32_t n = 5; n < 8; n++) {
17936 for (size_t k = 1; k <= 40; k += 9) {
17937 GemmMicrokernelTester()
17938 .mr(3)
17939 .nr(4)
17940 .kr(2)
17941 .sr(1)
17942 .m(3)
17943 .n(4)
17944 .k(k)
17945 .ks(3)
17946 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
17947 }
17948 }
17949 }
17950
17951 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, n_div_4_small_kernel) {
17952 TEST_REQUIRES_X86_XOP;
17953 for (uint32_t n = 8; n <= 12; n += 4) {
17954 for (size_t k = 1; k <= 40; k += 9) {
17955 GemmMicrokernelTester()
17956 .mr(3)
17957 .nr(4)
17958 .kr(2)
17959 .sr(1)
17960 .m(3)
17961 .n(4)
17962 .k(k)
17963 .ks(3)
17964 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
17965 }
17966 }
17967 }
17968
17969 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, strided_cm_subtile) {
17970 TEST_REQUIRES_X86_XOP;
17971 for (size_t k = 1; k <= 40; k += 9) {
17972 for (uint32_t m = 1; m <= 3; m++) {
17973 for (uint32_t n = 1; n <= 4; n++) {
17974 GemmMicrokernelTester()
17975 .mr(3)
17976 .nr(4)
17977 .kr(2)
17978 .sr(1)
17979 .m(m)
17980 .n(n)
17981 .k(k)
17982 .cm_stride(7)
17983 .iterations(1)
17984 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
17985 }
17986 }
17987 }
17988 }
17989
17990 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, a_offset) {
17991 TEST_REQUIRES_X86_XOP;
17992 for (size_t k = 1; k <= 40; k += 9) {
17993 GemmMicrokernelTester()
17994 .mr(3)
17995 .nr(4)
17996 .kr(2)
17997 .sr(1)
17998 .m(3)
17999 .n(4)
18000 .k(k)
18001 .ks(3)
18002 .a_offset(127)
18003 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
18004 }
18005 }
18006
18007 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, zero) {
18008 TEST_REQUIRES_X86_XOP;
18009 for (uint32_t mz = 0; mz < 3; mz++) {
18010 for (size_t k = 1; k <= 40; k += 9) {
18011 GemmMicrokernelTester()
18012 .mr(3)
18013 .nr(4)
18014 .kr(2)
18015 .sr(1)
18016 .m(3)
18017 .n(4)
18018 .k(k)
18019 .ks(3)
18020 .a_offset(127)
18021 .zero_index(mz)
18022 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
18023 }
18024 }
18025 }
18026
18027 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, qmin) {
18028 TEST_REQUIRES_X86_XOP;
18029 GemmMicrokernelTester()
18030 .mr(3)
18031 .nr(4)
18032 .kr(2)
18033 .sr(1)
18034 .m(3)
18035 .n(4)
18036 .k(8)
18037 .qmin(128)
18038 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
18039 }
18040
18041 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, qmax) {
18042 TEST_REQUIRES_X86_XOP;
18043 GemmMicrokernelTester()
18044 .mr(3)
18045 .nr(4)
18046 .kr(2)
18047 .sr(1)
18048 .m(3)
18049 .n(4)
18050 .k(8)
18051 .qmax(128)
18052 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
18053 }
18054
18055 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, strided_cm) {
18056 TEST_REQUIRES_X86_XOP;
18057 GemmMicrokernelTester()
18058 .mr(3)
18059 .nr(4)
18060 .kr(2)
18061 .sr(1)
18062 .m(3)
18063 .n(4)
18064 .k(8)
18065 .cm_stride(7)
18066 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
18067 }
18068
18069 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, no_a_zero_point) {
18070 TEST_REQUIRES_X86_XOP;
18071 for (size_t k = 1; k <= 40; k += 9) {
18072 GemmMicrokernelTester()
18073 .mr(3)
18074 .nr(4)
18075 .kr(2)
18076 .sr(1)
18077 .m(3)
18078 .n(4)
18079 .k(k)
18080 .a_zero_point(0)
18081 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
18082 }
18083 }
18084
18085 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, no_b_zero_point) {
18086 TEST_REQUIRES_X86_XOP;
18087 for (size_t k = 1; k <= 40; k += 9) {
18088 GemmMicrokernelTester()
18089 .mr(3)
18090 .nr(4)
18091 .kr(2)
18092 .sr(1)
18093 .m(3)
18094 .n(4)
18095 .k(k)
18096 .b_zero_point(0)
18097 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
18098 }
18099 }
18100
18101 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, no_zero_point) {
18102 TEST_REQUIRES_X86_XOP;
18103 for (size_t k = 1; k <= 40; k += 9) {
18104 GemmMicrokernelTester()
18105 .mr(3)
18106 .nr(4)
18107 .kr(2)
18108 .sr(1)
18109 .m(3)
18110 .n(4)
18111 .k(k)
18112 .a_zero_point(0)
18113 .b_zero_point(0)
18114 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
18115 }
18116 }
18117#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
18118
18119
18120#if XNN_ARCH_X86 || XNN_ARCH_X86_64
18121 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_eq_8) {
18122 TEST_REQUIRES_X86_XOP;
18123 GemmMicrokernelTester()
18124 .mr(4)
18125 .nr(4)
18126 .kr(2)
18127 .sr(1)
18128 .m(4)
18129 .n(4)
18130 .k(8)
18131 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
18132 }
18133
18134 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, strided_cn) {
18135 TEST_REQUIRES_X86_XOP;
18136 GemmMicrokernelTester()
18137 .mr(4)
18138 .nr(4)
18139 .kr(2)
18140 .sr(1)
18141 .m(4)
18142 .n(4)
18143 .k(8)
18144 .cn_stride(7)
18145 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
18146 }
18147
18148 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_eq_8_subtile) {
18149 TEST_REQUIRES_X86_XOP;
18150 for (uint32_t m = 1; m <= 4; m++) {
18151 for (uint32_t n = 1; n <= 4; n++) {
18152 GemmMicrokernelTester()
18153 .mr(4)
18154 .nr(4)
18155 .kr(2)
18156 .sr(1)
18157 .m(m)
18158 .n(n)
18159 .k(8)
18160 .iterations(1)
18161 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
18162 }
18163 }
18164 }
18165
18166 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_eq_8_subtile_m) {
18167 TEST_REQUIRES_X86_XOP;
18168 for (uint32_t m = 1; m <= 4; m++) {
18169 GemmMicrokernelTester()
18170 .mr(4)
18171 .nr(4)
18172 .kr(2)
18173 .sr(1)
18174 .m(m)
18175 .n(4)
18176 .k(8)
18177 .iterations(1)
18178 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
18179 }
18180 }
18181
18182 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_eq_8_subtile_n) {
18183 TEST_REQUIRES_X86_XOP;
18184 for (uint32_t n = 1; n <= 4; n++) {
18185 GemmMicrokernelTester()
18186 .mr(4)
18187 .nr(4)
18188 .kr(2)
18189 .sr(1)
18190 .m(4)
18191 .n(n)
18192 .k(8)
18193 .iterations(1)
18194 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
18195 }
18196 }
18197
18198 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_lt_8) {
18199 TEST_REQUIRES_X86_XOP;
18200 for (size_t k = 1; k < 8; k++) {
18201 GemmMicrokernelTester()
18202 .mr(4)
18203 .nr(4)
18204 .kr(2)
18205 .sr(1)
18206 .m(4)
18207 .n(4)
18208 .k(k)
18209 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
18210 }
18211 }
18212
18213 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_lt_8_subtile) {
18214 TEST_REQUIRES_X86_XOP;
18215 for (size_t k = 1; k < 8; k++) {
18216 for (uint32_t m = 1; m <= 4; m++) {
18217 for (uint32_t n = 1; n <= 4; n++) {
18218 GemmMicrokernelTester()
18219 .mr(4)
18220 .nr(4)
18221 .kr(2)
18222 .sr(1)
18223 .m(m)
18224 .n(n)
18225 .k(k)
18226 .iterations(1)
18227 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
18228 }
18229 }
18230 }
18231 }
18232
18233 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_gt_8) {
18234 TEST_REQUIRES_X86_XOP;
18235 for (size_t k = 9; k < 16; k++) {
18236 GemmMicrokernelTester()
18237 .mr(4)
18238 .nr(4)
18239 .kr(2)
18240 .sr(1)
18241 .m(4)
18242 .n(4)
18243 .k(k)
18244 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
18245 }
18246 }
18247
18248 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_gt_8_subtile) {
18249 TEST_REQUIRES_X86_XOP;
18250 for (size_t k = 9; k < 16; k++) {
18251 for (uint32_t m = 1; m <= 4; m++) {
18252 for (uint32_t n = 1; n <= 4; n++) {
18253 GemmMicrokernelTester()
18254 .mr(4)
18255 .nr(4)
18256 .kr(2)
18257 .sr(1)
18258 .m(m)
18259 .n(n)
18260 .k(k)
18261 .iterations(1)
18262 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
18263 }
18264 }
18265 }
18266 }
18267
18268 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_div_8) {
18269 TEST_REQUIRES_X86_XOP;
18270 for (size_t k = 16; k <= 80; k += 8) {
18271 GemmMicrokernelTester()
18272 .mr(4)
18273 .nr(4)
18274 .kr(2)
18275 .sr(1)
18276 .m(4)
18277 .n(4)
18278 .k(k)
18279 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
18280 }
18281 }
18282
18283 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_div_8_subtile) {
18284 TEST_REQUIRES_X86_XOP;
18285 for (size_t k = 16; k <= 80; k += 8) {
18286 for (uint32_t m = 1; m <= 4; m++) {
18287 for (uint32_t n = 1; n <= 4; n++) {
18288 GemmMicrokernelTester()
18289 .mr(4)
18290 .nr(4)
18291 .kr(2)
18292 .sr(1)
18293 .m(m)
18294 .n(n)
18295 .k(k)
18296 .iterations(1)
18297 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
18298 }
18299 }
18300 }
18301 }
18302
18303 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_gt_4) {
18304 TEST_REQUIRES_X86_XOP;
18305 for (uint32_t n = 5; n < 8; n++) {
18306 for (size_t k = 1; k <= 40; k += 9) {
18307 GemmMicrokernelTester()
18308 .mr(4)
18309 .nr(4)
18310 .kr(2)
18311 .sr(1)
18312 .m(4)
18313 .n(4)
18314 .k(k)
18315 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
18316 }
18317 }
18318 }
18319
18320 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_gt_4_strided_cn) {
18321 TEST_REQUIRES_X86_XOP;
18322 for (uint32_t n = 5; n < 8; n++) {
18323 for (size_t k = 1; k <= 40; k += 9) {
18324 GemmMicrokernelTester()
18325 .mr(4)
18326 .nr(4)
18327 .kr(2)
18328 .sr(1)
18329 .m(4)
18330 .n(4)
18331 .k(k)
18332 .cn_stride(7)
18333 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
18334 }
18335 }
18336 }
18337
18338 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_gt_4_subtile) {
18339 TEST_REQUIRES_X86_XOP;
18340 for (uint32_t n = 5; n < 8; n++) {
18341 for (size_t k = 1; k <= 40; k += 9) {
18342 for (uint32_t m = 1; m <= 4; m++) {
18343 GemmMicrokernelTester()
18344 .mr(4)
18345 .nr(4)
18346 .kr(2)
18347 .sr(1)
18348 .m(m)
18349 .n(n)
18350 .k(k)
18351 .iterations(1)
18352 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
18353 }
18354 }
18355 }
18356 }
18357
18358 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_div_4) {
18359 TEST_REQUIRES_X86_XOP;
18360 for (uint32_t n = 8; n <= 12; n += 4) {
18361 for (size_t k = 1; k <= 40; k += 9) {
18362 GemmMicrokernelTester()
18363 .mr(4)
18364 .nr(4)
18365 .kr(2)
18366 .sr(1)
18367 .m(4)
18368 .n(4)
18369 .k(k)
18370 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
18371 }
18372 }
18373 }
18374
18375 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_div_4_strided_cn) {
18376 TEST_REQUIRES_X86_XOP;
18377 for (uint32_t n = 8; n <= 12; n += 4) {
18378 for (size_t k = 1; k <= 40; k += 9) {
18379 GemmMicrokernelTester()
18380 .mr(4)
18381 .nr(4)
18382 .kr(2)
18383 .sr(1)
18384 .m(4)
18385 .n(n)
18386 .k(k)
18387 .cn_stride(7)
18388 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
18389 }
18390 }
18391 }
18392
18393 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_div_4_subtile) {
18394 TEST_REQUIRES_X86_XOP;
18395 for (uint32_t n = 8; n <= 12; n += 4) {
18396 for (size_t k = 1; k <= 40; k += 9) {
18397 for (uint32_t m = 1; m <= 4; m++) {
18398 GemmMicrokernelTester()
18399 .mr(4)
18400 .nr(4)
18401 .kr(2)
18402 .sr(1)
18403 .m(m)
18404 .n(n)
18405 .k(k)
18406 .iterations(1)
18407 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
18408 }
18409 }
18410 }
18411 }
18412
18413 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, small_kernel) {
18414 TEST_REQUIRES_X86_XOP;
18415 for (size_t k = 1; k <= 40; k += 9) {
18416 GemmMicrokernelTester()
18417 .mr(4)
18418 .nr(4)
18419 .kr(2)
18420 .sr(1)
18421 .m(4)
18422 .n(4)
18423 .k(k)
18424 .ks(3)
18425 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
18426 }
18427 }
18428
18429 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, small_kernel_subtile) {
18430 TEST_REQUIRES_X86_XOP;
18431 for (size_t k = 1; k <= 40; k += 9) {
18432 for (uint32_t m = 1; m <= 4; m++) {
18433 for (uint32_t n = 1; n <= 4; n++) {
18434 GemmMicrokernelTester()
18435 .mr(4)
18436 .nr(4)
18437 .kr(2)
18438 .sr(1)
18439 .m(m)
18440 .n(n)
18441 .k(k)
18442 .ks(3)
18443 .iterations(1)
18444 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
18445 }
18446 }
18447 }
18448 }
18449
18450 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_gt_4_small_kernel) {
18451 TEST_REQUIRES_X86_XOP;
18452 for (uint32_t n = 5; n < 8; n++) {
18453 for (size_t k = 1; k <= 40; k += 9) {
18454 GemmMicrokernelTester()
18455 .mr(4)
18456 .nr(4)
18457 .kr(2)
18458 .sr(1)
18459 .m(4)
18460 .n(4)
18461 .k(k)
18462 .ks(3)
18463 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
18464 }
18465 }
18466 }
18467
18468 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_div_4_small_kernel) {
18469 TEST_REQUIRES_X86_XOP;
18470 for (uint32_t n = 8; n <= 12; n += 4) {
18471 for (size_t k = 1; k <= 40; k += 9) {
18472 GemmMicrokernelTester()
18473 .mr(4)
18474 .nr(4)
18475 .kr(2)
18476 .sr(1)
18477 .m(4)
18478 .n(4)
18479 .k(k)
18480 .ks(3)
18481 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
18482 }
18483 }
18484 }
18485
18486 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, strided_cm_subtile) {
18487 TEST_REQUIRES_X86_XOP;
18488 for (size_t k = 1; k <= 40; k += 9) {
18489 for (uint32_t m = 1; m <= 4; m++) {
18490 for (uint32_t n = 1; n <= 4; n++) {
18491 GemmMicrokernelTester()
18492 .mr(4)
18493 .nr(4)
18494 .kr(2)
18495 .sr(1)
18496 .m(m)
18497 .n(n)
18498 .k(k)
18499 .cm_stride(7)
18500 .iterations(1)
18501 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
18502 }
18503 }
18504 }
18505 }
18506
18507 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, a_offset) {
18508 TEST_REQUIRES_X86_XOP;
18509 for (size_t k = 1; k <= 40; k += 9) {
18510 GemmMicrokernelTester()
18511 .mr(4)
18512 .nr(4)
18513 .kr(2)
18514 .sr(1)
18515 .m(4)
18516 .n(4)
18517 .k(k)
18518 .ks(3)
18519 .a_offset(163)
18520 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
18521 }
18522 }
18523
18524 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, zero) {
18525 TEST_REQUIRES_X86_XOP;
18526 for (uint32_t mz = 0; mz < 4; mz++) {
18527 for (size_t k = 1; k <= 40; k += 9) {
18528 GemmMicrokernelTester()
18529 .mr(4)
18530 .nr(4)
18531 .kr(2)
18532 .sr(1)
18533 .m(4)
18534 .n(4)
18535 .k(k)
18536 .ks(3)
18537 .a_offset(163)
18538 .zero_index(mz)
18539 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
18540 }
18541 }
18542 }
18543
18544 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, qmin) {
18545 TEST_REQUIRES_X86_XOP;
18546 GemmMicrokernelTester()
18547 .mr(4)
18548 .nr(4)
18549 .kr(2)
18550 .sr(1)
18551 .m(4)
18552 .n(4)
18553 .k(8)
18554 .qmin(128)
18555 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
18556 }
18557
18558 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, qmax) {
18559 TEST_REQUIRES_X86_XOP;
18560 GemmMicrokernelTester()
18561 .mr(4)
18562 .nr(4)
18563 .kr(2)
18564 .sr(1)
18565 .m(4)
18566 .n(4)
18567 .k(8)
18568 .qmax(128)
18569 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
18570 }
18571
18572 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, strided_cm) {
18573 TEST_REQUIRES_X86_XOP;
18574 GemmMicrokernelTester()
18575 .mr(4)
18576 .nr(4)
18577 .kr(2)
18578 .sr(1)
18579 .m(4)
18580 .n(4)
18581 .k(8)
18582 .cm_stride(7)
18583 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
18584 }
18585
18586 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, no_a_zero_point) {
18587 TEST_REQUIRES_X86_XOP;
18588 for (size_t k = 1; k <= 40; k += 9) {
18589 GemmMicrokernelTester()
18590 .mr(4)
18591 .nr(4)
18592 .kr(2)
18593 .sr(1)
18594 .m(4)
18595 .n(4)
18596 .k(k)
18597 .a_zero_point(0)
18598 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
18599 }
18600 }
18601
18602 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, no_b_zero_point) {
18603 TEST_REQUIRES_X86_XOP;
18604 for (size_t k = 1; k <= 40; k += 9) {
18605 GemmMicrokernelTester()
18606 .mr(4)
18607 .nr(4)
18608 .kr(2)
18609 .sr(1)
18610 .m(4)
18611 .n(4)
18612 .k(k)
18613 .b_zero_point(0)
18614 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
18615 }
18616 }
18617
18618 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, no_zero_point) {
18619 TEST_REQUIRES_X86_XOP;
18620 for (size_t k = 1; k <= 40; k += 9) {
18621 GemmMicrokernelTester()
18622 .mr(4)
18623 .nr(4)
18624 .kr(2)
18625 .sr(1)
18626 .m(4)
18627 .n(4)
18628 .k(k)
18629 .a_zero_point(0)
18630 .b_zero_point(0)
18631 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
18632 }
18633 }
18634#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
18635
18636
18637#if XNN_ARCH_X86 || XNN_ARCH_X86_64
18638 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_eq_8) {
18639 TEST_REQUIRES_X86_SSE2;
18640 GemmMicrokernelTester()
18641 .mr(1)
18642 .nr(4)
18643 .kr(8)
18644 .sr(1)
18645 .m(1)
18646 .n(4)
18647 .k(8)
18648 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
18649 }
18650
18651 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, strided_cn) {
18652 TEST_REQUIRES_X86_SSE2;
18653 GemmMicrokernelTester()
18654 .mr(1)
18655 .nr(4)
18656 .kr(8)
18657 .sr(1)
18658 .m(1)
18659 .n(4)
18660 .k(8)
18661 .cn_stride(7)
18662 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
18663 }
18664
18665 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_eq_8_subtile) {
18666 TEST_REQUIRES_X86_SSE2;
18667 for (uint32_t m = 1; m <= 1; m++) {
18668 for (uint32_t n = 1; n <= 4; n++) {
18669 GemmMicrokernelTester()
18670 .mr(1)
18671 .nr(4)
18672 .kr(8)
18673 .sr(1)
18674 .m(m)
18675 .n(n)
18676 .k(8)
18677 .iterations(1)
18678 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
18679 }
18680 }
18681 }
18682
18683 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_eq_8_subtile_m) {
18684 TEST_REQUIRES_X86_SSE2;
18685 for (uint32_t m = 1; m <= 1; m++) {
18686 GemmMicrokernelTester()
18687 .mr(1)
18688 .nr(4)
18689 .kr(8)
18690 .sr(1)
18691 .m(m)
18692 .n(4)
18693 .k(8)
18694 .iterations(1)
18695 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
18696 }
18697 }
18698
18699 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_eq_8_subtile_n) {
18700 TEST_REQUIRES_X86_SSE2;
18701 for (uint32_t n = 1; n <= 4; n++) {
18702 GemmMicrokernelTester()
18703 .mr(1)
18704 .nr(4)
18705 .kr(8)
18706 .sr(1)
18707 .m(1)
18708 .n(n)
18709 .k(8)
18710 .iterations(1)
18711 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
18712 }
18713 }
18714
18715 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_lt_8) {
18716 TEST_REQUIRES_X86_SSE2;
18717 for (size_t k = 1; k < 8; k++) {
18718 GemmMicrokernelTester()
18719 .mr(1)
18720 .nr(4)
18721 .kr(8)
18722 .sr(1)
18723 .m(1)
18724 .n(4)
18725 .k(k)
18726 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
18727 }
18728 }
18729
18730 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_lt_8_subtile) {
18731 TEST_REQUIRES_X86_SSE2;
18732 for (size_t k = 1; k < 8; k++) {
18733 for (uint32_t m = 1; m <= 1; m++) {
18734 for (uint32_t n = 1; n <= 4; n++) {
18735 GemmMicrokernelTester()
18736 .mr(1)
18737 .nr(4)
18738 .kr(8)
18739 .sr(1)
18740 .m(m)
18741 .n(n)
18742 .k(k)
18743 .iterations(1)
18744 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
18745 }
18746 }
18747 }
18748 }
18749
18750 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_gt_8) {
18751 TEST_REQUIRES_X86_SSE2;
18752 for (size_t k = 9; k < 16; k++) {
18753 GemmMicrokernelTester()
18754 .mr(1)
18755 .nr(4)
18756 .kr(8)
18757 .sr(1)
18758 .m(1)
18759 .n(4)
18760 .k(k)
18761 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
18762 }
18763 }
18764
18765 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_gt_8_subtile) {
18766 TEST_REQUIRES_X86_SSE2;
18767 for (size_t k = 9; k < 16; k++) {
18768 for (uint32_t m = 1; m <= 1; m++) {
18769 for (uint32_t n = 1; n <= 4; n++) {
18770 GemmMicrokernelTester()
18771 .mr(1)
18772 .nr(4)
18773 .kr(8)
18774 .sr(1)
18775 .m(m)
18776 .n(n)
18777 .k(k)
18778 .iterations(1)
18779 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
18780 }
18781 }
18782 }
18783 }
18784
18785 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_div_8) {
18786 TEST_REQUIRES_X86_SSE2;
18787 for (size_t k = 16; k <= 80; k += 8) {
18788 GemmMicrokernelTester()
18789 .mr(1)
18790 .nr(4)
18791 .kr(8)
18792 .sr(1)
18793 .m(1)
18794 .n(4)
18795 .k(k)
18796 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
18797 }
18798 }
18799
18800 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_div_8_subtile) {
18801 TEST_REQUIRES_X86_SSE2;
18802 for (size_t k = 16; k <= 80; k += 8) {
18803 for (uint32_t m = 1; m <= 1; m++) {
18804 for (uint32_t n = 1; n <= 4; n++) {
18805 GemmMicrokernelTester()
18806 .mr(1)
18807 .nr(4)
18808 .kr(8)
18809 .sr(1)
18810 .m(m)
18811 .n(n)
18812 .k(k)
18813 .iterations(1)
18814 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
18815 }
18816 }
18817 }
18818 }
18819
18820 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_gt_4) {
18821 TEST_REQUIRES_X86_SSE2;
18822 for (uint32_t n = 5; n < 8; n++) {
18823 for (size_t k = 1; k <= 40; k += 9) {
18824 GemmMicrokernelTester()
18825 .mr(1)
18826 .nr(4)
18827 .kr(8)
18828 .sr(1)
18829 .m(1)
18830 .n(4)
18831 .k(k)
18832 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
18833 }
18834 }
18835 }
18836
18837 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_gt_4_strided_cn) {
18838 TEST_REQUIRES_X86_SSE2;
18839 for (uint32_t n = 5; n < 8; n++) {
18840 for (size_t k = 1; k <= 40; k += 9) {
18841 GemmMicrokernelTester()
18842 .mr(1)
18843 .nr(4)
18844 .kr(8)
18845 .sr(1)
18846 .m(1)
18847 .n(4)
18848 .k(k)
18849 .cn_stride(7)
18850 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
18851 }
18852 }
18853 }
18854
18855 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_gt_4_subtile) {
18856 TEST_REQUIRES_X86_SSE2;
18857 for (uint32_t n = 5; n < 8; n++) {
18858 for (size_t k = 1; k <= 40; k += 9) {
18859 for (uint32_t m = 1; m <= 1; m++) {
18860 GemmMicrokernelTester()
18861 .mr(1)
18862 .nr(4)
18863 .kr(8)
18864 .sr(1)
18865 .m(m)
18866 .n(n)
18867 .k(k)
18868 .iterations(1)
18869 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
18870 }
18871 }
18872 }
18873 }
18874
18875 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_div_4) {
18876 TEST_REQUIRES_X86_SSE2;
18877 for (uint32_t n = 8; n <= 12; n += 4) {
18878 for (size_t k = 1; k <= 40; k += 9) {
18879 GemmMicrokernelTester()
18880 .mr(1)
18881 .nr(4)
18882 .kr(8)
18883 .sr(1)
18884 .m(1)
18885 .n(4)
18886 .k(k)
18887 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
18888 }
18889 }
18890 }
18891
18892 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_div_4_strided_cn) {
18893 TEST_REQUIRES_X86_SSE2;
18894 for (uint32_t n = 8; n <= 12; n += 4) {
18895 for (size_t k = 1; k <= 40; k += 9) {
18896 GemmMicrokernelTester()
18897 .mr(1)
18898 .nr(4)
18899 .kr(8)
18900 .sr(1)
18901 .m(1)
18902 .n(n)
18903 .k(k)
18904 .cn_stride(7)
18905 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
18906 }
18907 }
18908 }
18909
18910 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_div_4_subtile) {
18911 TEST_REQUIRES_X86_SSE2;
18912 for (uint32_t n = 8; n <= 12; n += 4) {
18913 for (size_t k = 1; k <= 40; k += 9) {
18914 for (uint32_t m = 1; m <= 1; m++) {
18915 GemmMicrokernelTester()
18916 .mr(1)
18917 .nr(4)
18918 .kr(8)
18919 .sr(1)
18920 .m(m)
18921 .n(n)
18922 .k(k)
18923 .iterations(1)
18924 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
18925 }
18926 }
18927 }
18928 }
18929
18930 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, small_kernel) {
18931 TEST_REQUIRES_X86_SSE2;
18932 for (size_t k = 1; k <= 40; k += 9) {
18933 GemmMicrokernelTester()
18934 .mr(1)
18935 .nr(4)
18936 .kr(8)
18937 .sr(1)
18938 .m(1)
18939 .n(4)
18940 .k(k)
18941 .ks(3)
18942 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
18943 }
18944 }
18945
18946 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, small_kernel_subtile) {
18947 TEST_REQUIRES_X86_SSE2;
18948 for (size_t k = 1; k <= 40; k += 9) {
18949 for (uint32_t m = 1; m <= 1; m++) {
18950 for (uint32_t n = 1; n <= 4; n++) {
18951 GemmMicrokernelTester()
18952 .mr(1)
18953 .nr(4)
18954 .kr(8)
18955 .sr(1)
18956 .m(m)
18957 .n(n)
18958 .k(k)
18959 .ks(3)
18960 .iterations(1)
18961 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
18962 }
18963 }
18964 }
18965 }
18966
18967 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_gt_4_small_kernel) {
18968 TEST_REQUIRES_X86_SSE2;
18969 for (uint32_t n = 5; n < 8; n++) {
18970 for (size_t k = 1; k <= 40; k += 9) {
18971 GemmMicrokernelTester()
18972 .mr(1)
18973 .nr(4)
18974 .kr(8)
18975 .sr(1)
18976 .m(1)
18977 .n(4)
18978 .k(k)
18979 .ks(3)
18980 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
18981 }
18982 }
18983 }
18984
18985 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_div_4_small_kernel) {
18986 TEST_REQUIRES_X86_SSE2;
18987 for (uint32_t n = 8; n <= 12; n += 4) {
18988 for (size_t k = 1; k <= 40; k += 9) {
18989 GemmMicrokernelTester()
18990 .mr(1)
18991 .nr(4)
18992 .kr(8)
18993 .sr(1)
18994 .m(1)
18995 .n(4)
18996 .k(k)
18997 .ks(3)
18998 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
18999 }
19000 }
19001 }
19002
19003 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, strided_cm_subtile) {
19004 TEST_REQUIRES_X86_SSE2;
19005 for (size_t k = 1; k <= 40; k += 9) {
19006 for (uint32_t m = 1; m <= 1; m++) {
19007 for (uint32_t n = 1; n <= 4; n++) {
19008 GemmMicrokernelTester()
19009 .mr(1)
19010 .nr(4)
19011 .kr(8)
19012 .sr(1)
19013 .m(m)
19014 .n(n)
19015 .k(k)
19016 .cm_stride(7)
19017 .iterations(1)
19018 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
19019 }
19020 }
19021 }
19022 }
19023
19024 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, a_offset) {
19025 TEST_REQUIRES_X86_SSE2;
19026 for (size_t k = 1; k <= 40; k += 9) {
19027 GemmMicrokernelTester()
19028 .mr(1)
19029 .nr(4)
19030 .kr(8)
19031 .sr(1)
19032 .m(1)
19033 .n(4)
19034 .k(k)
19035 .ks(3)
19036 .a_offset(43)
19037 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
19038 }
19039 }
19040
19041 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, zero) {
19042 TEST_REQUIRES_X86_SSE2;
19043 for (uint32_t mz = 0; mz < 1; mz++) {
19044 for (size_t k = 1; k <= 40; k += 9) {
19045 GemmMicrokernelTester()
19046 .mr(1)
19047 .nr(4)
19048 .kr(8)
19049 .sr(1)
19050 .m(1)
19051 .n(4)
19052 .k(k)
19053 .ks(3)
19054 .a_offset(43)
19055 .zero_index(mz)
19056 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
19057 }
19058 }
19059 }
19060
19061 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, qmin) {
19062 TEST_REQUIRES_X86_SSE2;
19063 GemmMicrokernelTester()
19064 .mr(1)
19065 .nr(4)
19066 .kr(8)
19067 .sr(1)
19068 .m(1)
19069 .n(4)
19070 .k(8)
19071 .qmin(128)
19072 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
19073 }
19074
19075 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, qmax) {
19076 TEST_REQUIRES_X86_SSE2;
19077 GemmMicrokernelTester()
19078 .mr(1)
19079 .nr(4)
19080 .kr(8)
19081 .sr(1)
19082 .m(1)
19083 .n(4)
19084 .k(8)
19085 .qmax(128)
19086 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
19087 }
19088
19089 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, strided_cm) {
19090 TEST_REQUIRES_X86_SSE2;
19091 GemmMicrokernelTester()
19092 .mr(1)
19093 .nr(4)
19094 .kr(8)
19095 .sr(1)
19096 .m(1)
19097 .n(4)
19098 .k(8)
19099 .cm_stride(7)
19100 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
19101 }
19102
19103 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, no_a_zero_point) {
19104 TEST_REQUIRES_X86_SSE2;
19105 for (size_t k = 1; k <= 40; k += 9) {
19106 GemmMicrokernelTester()
19107 .mr(1)
19108 .nr(4)
19109 .kr(8)
19110 .sr(1)
19111 .m(1)
19112 .n(4)
19113 .k(k)
19114 .a_zero_point(0)
19115 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
19116 }
19117 }
19118
19119 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, no_b_zero_point) {
19120 TEST_REQUIRES_X86_SSE2;
19121 for (size_t k = 1; k <= 40; k += 9) {
19122 GemmMicrokernelTester()
19123 .mr(1)
19124 .nr(4)
19125 .kr(8)
19126 .sr(1)
19127 .m(1)
19128 .n(4)
19129 .k(k)
19130 .b_zero_point(0)
19131 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
19132 }
19133 }
19134
19135 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, no_zero_point) {
19136 TEST_REQUIRES_X86_SSE2;
19137 for (size_t k = 1; k <= 40; k += 9) {
19138 GemmMicrokernelTester()
19139 .mr(1)
19140 .nr(4)
19141 .kr(8)
19142 .sr(1)
19143 .m(1)
19144 .n(4)
19145 .k(k)
19146 .a_zero_point(0)
19147 .b_zero_point(0)
19148 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
19149 }
19150 }
19151#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
19152
19153
19154#if XNN_ARCH_X86 || XNN_ARCH_X86_64
19155 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_eq_8) {
19156 TEST_REQUIRES_X86_SSE2;
19157 GemmMicrokernelTester()
19158 .mr(2)
19159 .nr(4)
19160 .kr(8)
19161 .sr(1)
19162 .m(2)
19163 .n(4)
19164 .k(8)
19165 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
19166 }
19167
19168 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, strided_cn) {
19169 TEST_REQUIRES_X86_SSE2;
19170 GemmMicrokernelTester()
19171 .mr(2)
19172 .nr(4)
19173 .kr(8)
19174 .sr(1)
19175 .m(2)
19176 .n(4)
19177 .k(8)
19178 .cn_stride(7)
19179 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
19180 }
19181
19182 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_eq_8_subtile) {
19183 TEST_REQUIRES_X86_SSE2;
19184 for (uint32_t m = 1; m <= 2; m++) {
19185 for (uint32_t n = 1; n <= 4; n++) {
19186 GemmMicrokernelTester()
19187 .mr(2)
19188 .nr(4)
19189 .kr(8)
19190 .sr(1)
19191 .m(m)
19192 .n(n)
19193 .k(8)
19194 .iterations(1)
19195 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
19196 }
19197 }
19198 }
19199
19200 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_eq_8_subtile_m) {
19201 TEST_REQUIRES_X86_SSE2;
19202 for (uint32_t m = 1; m <= 2; m++) {
19203 GemmMicrokernelTester()
19204 .mr(2)
19205 .nr(4)
19206 .kr(8)
19207 .sr(1)
19208 .m(m)
19209 .n(4)
19210 .k(8)
19211 .iterations(1)
19212 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
19213 }
19214 }
19215
19216 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_eq_8_subtile_n) {
19217 TEST_REQUIRES_X86_SSE2;
19218 for (uint32_t n = 1; n <= 4; n++) {
19219 GemmMicrokernelTester()
19220 .mr(2)
19221 .nr(4)
19222 .kr(8)
19223 .sr(1)
19224 .m(2)
19225 .n(n)
19226 .k(8)
19227 .iterations(1)
19228 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
19229 }
19230 }
19231
19232 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_lt_8) {
19233 TEST_REQUIRES_X86_SSE2;
19234 for (size_t k = 1; k < 8; k++) {
19235 GemmMicrokernelTester()
19236 .mr(2)
19237 .nr(4)
19238 .kr(8)
19239 .sr(1)
19240 .m(2)
19241 .n(4)
19242 .k(k)
19243 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
19244 }
19245 }
19246
19247 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_lt_8_subtile) {
19248 TEST_REQUIRES_X86_SSE2;
19249 for (size_t k = 1; k < 8; k++) {
19250 for (uint32_t m = 1; m <= 2; m++) {
19251 for (uint32_t n = 1; n <= 4; n++) {
19252 GemmMicrokernelTester()
19253 .mr(2)
19254 .nr(4)
19255 .kr(8)
19256 .sr(1)
19257 .m(m)
19258 .n(n)
19259 .k(k)
19260 .iterations(1)
19261 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
19262 }
19263 }
19264 }
19265 }
19266
19267 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_gt_8) {
19268 TEST_REQUIRES_X86_SSE2;
19269 for (size_t k = 9; k < 16; k++) {
19270 GemmMicrokernelTester()
19271 .mr(2)
19272 .nr(4)
19273 .kr(8)
19274 .sr(1)
19275 .m(2)
19276 .n(4)
19277 .k(k)
19278 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
19279 }
19280 }
19281
19282 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_gt_8_subtile) {
19283 TEST_REQUIRES_X86_SSE2;
19284 for (size_t k = 9; k < 16; k++) {
19285 for (uint32_t m = 1; m <= 2; m++) {
19286 for (uint32_t n = 1; n <= 4; n++) {
19287 GemmMicrokernelTester()
19288 .mr(2)
19289 .nr(4)
19290 .kr(8)
19291 .sr(1)
19292 .m(m)
19293 .n(n)
19294 .k(k)
19295 .iterations(1)
19296 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
19297 }
19298 }
19299 }
19300 }
19301
19302 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_div_8) {
19303 TEST_REQUIRES_X86_SSE2;
19304 for (size_t k = 16; k <= 80; k += 8) {
19305 GemmMicrokernelTester()
19306 .mr(2)
19307 .nr(4)
19308 .kr(8)
19309 .sr(1)
19310 .m(2)
19311 .n(4)
19312 .k(k)
19313 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
19314 }
19315 }
19316
19317 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_div_8_subtile) {
19318 TEST_REQUIRES_X86_SSE2;
19319 for (size_t k = 16; k <= 80; k += 8) {
19320 for (uint32_t m = 1; m <= 2; m++) {
19321 for (uint32_t n = 1; n <= 4; n++) {
19322 GemmMicrokernelTester()
19323 .mr(2)
19324 .nr(4)
19325 .kr(8)
19326 .sr(1)
19327 .m(m)
19328 .n(n)
19329 .k(k)
19330 .iterations(1)
19331 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
19332 }
19333 }
19334 }
19335 }
19336
19337 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_gt_4) {
19338 TEST_REQUIRES_X86_SSE2;
19339 for (uint32_t n = 5; n < 8; n++) {
19340 for (size_t k = 1; k <= 40; k += 9) {
19341 GemmMicrokernelTester()
19342 .mr(2)
19343 .nr(4)
19344 .kr(8)
19345 .sr(1)
19346 .m(2)
19347 .n(4)
19348 .k(k)
19349 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
19350 }
19351 }
19352 }
19353
19354 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_gt_4_strided_cn) {
19355 TEST_REQUIRES_X86_SSE2;
19356 for (uint32_t n = 5; n < 8; n++) {
19357 for (size_t k = 1; k <= 40; k += 9) {
19358 GemmMicrokernelTester()
19359 .mr(2)
19360 .nr(4)
19361 .kr(8)
19362 .sr(1)
19363 .m(2)
19364 .n(4)
19365 .k(k)
19366 .cn_stride(7)
19367 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
19368 }
19369 }
19370 }
19371
19372 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_gt_4_subtile) {
19373 TEST_REQUIRES_X86_SSE2;
19374 for (uint32_t n = 5; n < 8; n++) {
19375 for (size_t k = 1; k <= 40; k += 9) {
19376 for (uint32_t m = 1; m <= 2; m++) {
19377 GemmMicrokernelTester()
19378 .mr(2)
19379 .nr(4)
19380 .kr(8)
19381 .sr(1)
19382 .m(m)
19383 .n(n)
19384 .k(k)
19385 .iterations(1)
19386 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
19387 }
19388 }
19389 }
19390 }
19391
19392 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_div_4) {
19393 TEST_REQUIRES_X86_SSE2;
19394 for (uint32_t n = 8; n <= 12; n += 4) {
19395 for (size_t k = 1; k <= 40; k += 9) {
19396 GemmMicrokernelTester()
19397 .mr(2)
19398 .nr(4)
19399 .kr(8)
19400 .sr(1)
19401 .m(2)
19402 .n(4)
19403 .k(k)
19404 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
19405 }
19406 }
19407 }
19408
19409 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_div_4_strided_cn) {
19410 TEST_REQUIRES_X86_SSE2;
19411 for (uint32_t n = 8; n <= 12; n += 4) {
19412 for (size_t k = 1; k <= 40; k += 9) {
19413 GemmMicrokernelTester()
19414 .mr(2)
19415 .nr(4)
19416 .kr(8)
19417 .sr(1)
19418 .m(2)
19419 .n(n)
19420 .k(k)
19421 .cn_stride(7)
19422 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
19423 }
19424 }
19425 }
19426
19427 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_div_4_subtile) {
19428 TEST_REQUIRES_X86_SSE2;
19429 for (uint32_t n = 8; n <= 12; n += 4) {
19430 for (size_t k = 1; k <= 40; k += 9) {
19431 for (uint32_t m = 1; m <= 2; m++) {
19432 GemmMicrokernelTester()
19433 .mr(2)
19434 .nr(4)
19435 .kr(8)
19436 .sr(1)
19437 .m(m)
19438 .n(n)
19439 .k(k)
19440 .iterations(1)
19441 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
19442 }
19443 }
19444 }
19445 }
19446
19447 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, small_kernel) {
19448 TEST_REQUIRES_X86_SSE2;
19449 for (size_t k = 1; k <= 40; k += 9) {
19450 GemmMicrokernelTester()
19451 .mr(2)
19452 .nr(4)
19453 .kr(8)
19454 .sr(1)
19455 .m(2)
19456 .n(4)
19457 .k(k)
19458 .ks(3)
19459 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
19460 }
19461 }
19462
19463 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, small_kernel_subtile) {
19464 TEST_REQUIRES_X86_SSE2;
19465 for (size_t k = 1; k <= 40; k += 9) {
19466 for (uint32_t m = 1; m <= 2; m++) {
19467 for (uint32_t n = 1; n <= 4; n++) {
19468 GemmMicrokernelTester()
19469 .mr(2)
19470 .nr(4)
19471 .kr(8)
19472 .sr(1)
19473 .m(m)
19474 .n(n)
19475 .k(k)
19476 .ks(3)
19477 .iterations(1)
19478 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
19479 }
19480 }
19481 }
19482 }
19483
19484 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_gt_4_small_kernel) {
19485 TEST_REQUIRES_X86_SSE2;
19486 for (uint32_t n = 5; n < 8; n++) {
19487 for (size_t k = 1; k <= 40; k += 9) {
19488 GemmMicrokernelTester()
19489 .mr(2)
19490 .nr(4)
19491 .kr(8)
19492 .sr(1)
19493 .m(2)
19494 .n(4)
19495 .k(k)
19496 .ks(3)
19497 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
19498 }
19499 }
19500 }
19501
19502 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_div_4_small_kernel) {
19503 TEST_REQUIRES_X86_SSE2;
19504 for (uint32_t n = 8; n <= 12; n += 4) {
19505 for (size_t k = 1; k <= 40; k += 9) {
19506 GemmMicrokernelTester()
19507 .mr(2)
19508 .nr(4)
19509 .kr(8)
19510 .sr(1)
19511 .m(2)
19512 .n(4)
19513 .k(k)
19514 .ks(3)
19515 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
19516 }
19517 }
19518 }
19519
19520 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, strided_cm_subtile) {
19521 TEST_REQUIRES_X86_SSE2;
19522 for (size_t k = 1; k <= 40; k += 9) {
19523 for (uint32_t m = 1; m <= 2; m++) {
19524 for (uint32_t n = 1; n <= 4; n++) {
19525 GemmMicrokernelTester()
19526 .mr(2)
19527 .nr(4)
19528 .kr(8)
19529 .sr(1)
19530 .m(m)
19531 .n(n)
19532 .k(k)
19533 .cm_stride(7)
19534 .iterations(1)
19535 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
19536 }
19537 }
19538 }
19539 }
19540
19541 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, a_offset) {
19542 TEST_REQUIRES_X86_SSE2;
19543 for (size_t k = 1; k <= 40; k += 9) {
19544 GemmMicrokernelTester()
19545 .mr(2)
19546 .nr(4)
19547 .kr(8)
19548 .sr(1)
19549 .m(2)
19550 .n(4)
19551 .k(k)
19552 .ks(3)
19553 .a_offset(83)
19554 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
19555 }
19556 }
19557
19558 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, zero) {
19559 TEST_REQUIRES_X86_SSE2;
19560 for (uint32_t mz = 0; mz < 2; mz++) {
19561 for (size_t k = 1; k <= 40; k += 9) {
19562 GemmMicrokernelTester()
19563 .mr(2)
19564 .nr(4)
19565 .kr(8)
19566 .sr(1)
19567 .m(2)
19568 .n(4)
19569 .k(k)
19570 .ks(3)
19571 .a_offset(83)
19572 .zero_index(mz)
19573 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
19574 }
19575 }
19576 }
19577
19578 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, qmin) {
19579 TEST_REQUIRES_X86_SSE2;
19580 GemmMicrokernelTester()
19581 .mr(2)
19582 .nr(4)
19583 .kr(8)
19584 .sr(1)
19585 .m(2)
19586 .n(4)
19587 .k(8)
19588 .qmin(128)
19589 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
19590 }
19591
19592 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, qmax) {
19593 TEST_REQUIRES_X86_SSE2;
19594 GemmMicrokernelTester()
19595 .mr(2)
19596 .nr(4)
19597 .kr(8)
19598 .sr(1)
19599 .m(2)
19600 .n(4)
19601 .k(8)
19602 .qmax(128)
19603 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
19604 }
19605
19606 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, strided_cm) {
19607 TEST_REQUIRES_X86_SSE2;
19608 GemmMicrokernelTester()
19609 .mr(2)
19610 .nr(4)
19611 .kr(8)
19612 .sr(1)
19613 .m(2)
19614 .n(4)
19615 .k(8)
19616 .cm_stride(7)
19617 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
19618 }
19619
19620 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, no_a_zero_point) {
19621 TEST_REQUIRES_X86_SSE2;
19622 for (size_t k = 1; k <= 40; k += 9) {
19623 GemmMicrokernelTester()
19624 .mr(2)
19625 .nr(4)
19626 .kr(8)
19627 .sr(1)
19628 .m(2)
19629 .n(4)
19630 .k(k)
19631 .a_zero_point(0)
19632 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
19633 }
19634 }
19635
19636 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, no_b_zero_point) {
19637 TEST_REQUIRES_X86_SSE2;
19638 for (size_t k = 1; k <= 40; k += 9) {
19639 GemmMicrokernelTester()
19640 .mr(2)
19641 .nr(4)
19642 .kr(8)
19643 .sr(1)
19644 .m(2)
19645 .n(4)
19646 .k(k)
19647 .b_zero_point(0)
19648 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
19649 }
19650 }
19651
19652 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, no_zero_point) {
19653 TEST_REQUIRES_X86_SSE2;
19654 for (size_t k = 1; k <= 40; k += 9) {
19655 GemmMicrokernelTester()
19656 .mr(2)
19657 .nr(4)
19658 .kr(8)
19659 .sr(1)
19660 .m(2)
19661 .n(4)
19662 .k(k)
19663 .a_zero_point(0)
19664 .b_zero_point(0)
19665 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
19666 }
19667 }
19668#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
19669
19670
19671#if XNN_ARCH_X86 || XNN_ARCH_X86_64
19672 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_eq_8) {
19673 TEST_REQUIRES_X86_SSE2;
19674 GemmMicrokernelTester()
19675 .mr(3)
19676 .nr(4)
19677 .kr(8)
19678 .sr(1)
19679 .m(3)
19680 .n(4)
19681 .k(8)
19682 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
19683 }
19684
19685 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, strided_cn) {
19686 TEST_REQUIRES_X86_SSE2;
19687 GemmMicrokernelTester()
19688 .mr(3)
19689 .nr(4)
19690 .kr(8)
19691 .sr(1)
19692 .m(3)
19693 .n(4)
19694 .k(8)
19695 .cn_stride(7)
19696 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
19697 }
19698
19699 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_eq_8_subtile) {
19700 TEST_REQUIRES_X86_SSE2;
19701 for (uint32_t m = 1; m <= 3; m++) {
19702 for (uint32_t n = 1; n <= 4; n++) {
19703 GemmMicrokernelTester()
19704 .mr(3)
19705 .nr(4)
19706 .kr(8)
19707 .sr(1)
19708 .m(m)
19709 .n(n)
19710 .k(8)
19711 .iterations(1)
19712 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
19713 }
19714 }
19715 }
19716
19717 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_eq_8_subtile_m) {
19718 TEST_REQUIRES_X86_SSE2;
19719 for (uint32_t m = 1; m <= 3; m++) {
19720 GemmMicrokernelTester()
19721 .mr(3)
19722 .nr(4)
19723 .kr(8)
19724 .sr(1)
19725 .m(m)
19726 .n(4)
19727 .k(8)
19728 .iterations(1)
19729 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
19730 }
19731 }
19732
19733 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_eq_8_subtile_n) {
19734 TEST_REQUIRES_X86_SSE2;
19735 for (uint32_t n = 1; n <= 4; n++) {
19736 GemmMicrokernelTester()
19737 .mr(3)
19738 .nr(4)
19739 .kr(8)
19740 .sr(1)
19741 .m(3)
19742 .n(n)
19743 .k(8)
19744 .iterations(1)
19745 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
19746 }
19747 }
19748
19749 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_lt_8) {
19750 TEST_REQUIRES_X86_SSE2;
19751 for (size_t k = 1; k < 8; k++) {
19752 GemmMicrokernelTester()
19753 .mr(3)
19754 .nr(4)
19755 .kr(8)
19756 .sr(1)
19757 .m(3)
19758 .n(4)
19759 .k(k)
19760 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
19761 }
19762 }
19763
19764 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_lt_8_subtile) {
19765 TEST_REQUIRES_X86_SSE2;
19766 for (size_t k = 1; k < 8; k++) {
19767 for (uint32_t m = 1; m <= 3; m++) {
19768 for (uint32_t n = 1; n <= 4; n++) {
19769 GemmMicrokernelTester()
19770 .mr(3)
19771 .nr(4)
19772 .kr(8)
19773 .sr(1)
19774 .m(m)
19775 .n(n)
19776 .k(k)
19777 .iterations(1)
19778 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
19779 }
19780 }
19781 }
19782 }
19783
19784 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_gt_8) {
19785 TEST_REQUIRES_X86_SSE2;
19786 for (size_t k = 9; k < 16; k++) {
19787 GemmMicrokernelTester()
19788 .mr(3)
19789 .nr(4)
19790 .kr(8)
19791 .sr(1)
19792 .m(3)
19793 .n(4)
19794 .k(k)
19795 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
19796 }
19797 }
19798
19799 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_gt_8_subtile) {
19800 TEST_REQUIRES_X86_SSE2;
19801 for (size_t k = 9; k < 16; k++) {
19802 for (uint32_t m = 1; m <= 3; m++) {
19803 for (uint32_t n = 1; n <= 4; n++) {
19804 GemmMicrokernelTester()
19805 .mr(3)
19806 .nr(4)
19807 .kr(8)
19808 .sr(1)
19809 .m(m)
19810 .n(n)
19811 .k(k)
19812 .iterations(1)
19813 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
19814 }
19815 }
19816 }
19817 }
19818
19819 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_div_8) {
19820 TEST_REQUIRES_X86_SSE2;
19821 for (size_t k = 16; k <= 80; k += 8) {
19822 GemmMicrokernelTester()
19823 .mr(3)
19824 .nr(4)
19825 .kr(8)
19826 .sr(1)
19827 .m(3)
19828 .n(4)
19829 .k(k)
19830 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
19831 }
19832 }
19833
19834 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_div_8_subtile) {
19835 TEST_REQUIRES_X86_SSE2;
19836 for (size_t k = 16; k <= 80; k += 8) {
19837 for (uint32_t m = 1; m <= 3; m++) {
19838 for (uint32_t n = 1; n <= 4; n++) {
19839 GemmMicrokernelTester()
19840 .mr(3)
19841 .nr(4)
19842 .kr(8)
19843 .sr(1)
19844 .m(m)
19845 .n(n)
19846 .k(k)
19847 .iterations(1)
19848 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
19849 }
19850 }
19851 }
19852 }
19853
19854 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, n_gt_4) {
19855 TEST_REQUIRES_X86_SSE2;
19856 for (uint32_t n = 5; n < 8; n++) {
19857 for (size_t k = 1; k <= 40; k += 9) {
19858 GemmMicrokernelTester()
19859 .mr(3)
19860 .nr(4)
19861 .kr(8)
19862 .sr(1)
19863 .m(3)
19864 .n(4)
19865 .k(k)
19866 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
19867 }
19868 }
19869 }
19870
19871 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, n_gt_4_strided_cn) {
19872 TEST_REQUIRES_X86_SSE2;
19873 for (uint32_t n = 5; n < 8; n++) {
19874 for (size_t k = 1; k <= 40; k += 9) {
19875 GemmMicrokernelTester()
19876 .mr(3)
19877 .nr(4)
19878 .kr(8)
19879 .sr(1)
19880 .m(3)
19881 .n(4)
19882 .k(k)
19883 .cn_stride(7)
19884 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
19885 }
19886 }
19887 }
19888
19889 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, n_gt_4_subtile) {
19890 TEST_REQUIRES_X86_SSE2;
19891 for (uint32_t n = 5; n < 8; n++) {
19892 for (size_t k = 1; k <= 40; k += 9) {
19893 for (uint32_t m = 1; m <= 3; m++) {
19894 GemmMicrokernelTester()
19895 .mr(3)
19896 .nr(4)
19897 .kr(8)
19898 .sr(1)
19899 .m(m)
19900 .n(n)
19901 .k(k)
19902 .iterations(1)
19903 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
19904 }
19905 }
19906 }
19907 }
19908
19909 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, n_div_4) {
19910 TEST_REQUIRES_X86_SSE2;
19911 for (uint32_t n = 8; n <= 12; n += 4) {
19912 for (size_t k = 1; k <= 40; k += 9) {
19913 GemmMicrokernelTester()
19914 .mr(3)
19915 .nr(4)
19916 .kr(8)
19917 .sr(1)
19918 .m(3)
19919 .n(4)
19920 .k(k)
19921 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
19922 }
19923 }
19924 }
19925
19926 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, n_div_4_strided_cn) {
19927 TEST_REQUIRES_X86_SSE2;
19928 for (uint32_t n = 8; n <= 12; n += 4) {
19929 for (size_t k = 1; k <= 40; k += 9) {
19930 GemmMicrokernelTester()
19931 .mr(3)
19932 .nr(4)
19933 .kr(8)
19934 .sr(1)
19935 .m(3)
19936 .n(n)
19937 .k(k)
19938 .cn_stride(7)
19939 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
19940 }
19941 }
19942 }
19943
19944 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, n_div_4_subtile) {
19945 TEST_REQUIRES_X86_SSE2;
19946 for (uint32_t n = 8; n <= 12; n += 4) {
19947 for (size_t k = 1; k <= 40; k += 9) {
19948 for (uint32_t m = 1; m <= 3; m++) {
19949 GemmMicrokernelTester()
19950 .mr(3)
19951 .nr(4)
19952 .kr(8)
19953 .sr(1)
19954 .m(m)
19955 .n(n)
19956 .k(k)
19957 .iterations(1)
19958 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
19959 }
19960 }
19961 }
19962 }
19963
19964 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, small_kernel) {
19965 TEST_REQUIRES_X86_SSE2;
19966 for (size_t k = 1; k <= 40; k += 9) {
19967 GemmMicrokernelTester()
19968 .mr(3)
19969 .nr(4)
19970 .kr(8)
19971 .sr(1)
19972 .m(3)
19973 .n(4)
19974 .k(k)
19975 .ks(3)
19976 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
19977 }
19978 }
19979
19980 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, small_kernel_subtile) {
19981 TEST_REQUIRES_X86_SSE2;
19982 for (size_t k = 1; k <= 40; k += 9) {
19983 for (uint32_t m = 1; m <= 3; m++) {
19984 for (uint32_t n = 1; n <= 4; n++) {
19985 GemmMicrokernelTester()
19986 .mr(3)
19987 .nr(4)
19988 .kr(8)
19989 .sr(1)
19990 .m(m)
19991 .n(n)
19992 .k(k)
19993 .ks(3)
19994 .iterations(1)
19995 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
19996 }
19997 }
19998 }
19999 }
20000
20001 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, n_gt_4_small_kernel) {
20002 TEST_REQUIRES_X86_SSE2;
20003 for (uint32_t n = 5; n < 8; n++) {
20004 for (size_t k = 1; k <= 40; k += 9) {
20005 GemmMicrokernelTester()
20006 .mr(3)
20007 .nr(4)
20008 .kr(8)
20009 .sr(1)
20010 .m(3)
20011 .n(4)
20012 .k(k)
20013 .ks(3)
20014 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
20015 }
20016 }
20017 }
20018
20019 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, n_div_4_small_kernel) {
20020 TEST_REQUIRES_X86_SSE2;
20021 for (uint32_t n = 8; n <= 12; n += 4) {
20022 for (size_t k = 1; k <= 40; k += 9) {
20023 GemmMicrokernelTester()
20024 .mr(3)
20025 .nr(4)
20026 .kr(8)
20027 .sr(1)
20028 .m(3)
20029 .n(4)
20030 .k(k)
20031 .ks(3)
20032 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
20033 }
20034 }
20035 }
20036
20037 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, strided_cm_subtile) {
20038 TEST_REQUIRES_X86_SSE2;
20039 for (size_t k = 1; k <= 40; k += 9) {
20040 for (uint32_t m = 1; m <= 3; m++) {
20041 for (uint32_t n = 1; n <= 4; n++) {
20042 GemmMicrokernelTester()
20043 .mr(3)
20044 .nr(4)
20045 .kr(8)
20046 .sr(1)
20047 .m(m)
20048 .n(n)
20049 .k(k)
20050 .cm_stride(7)
20051 .iterations(1)
20052 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
20053 }
20054 }
20055 }
20056 }
20057
20058 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, a_offset) {
20059 TEST_REQUIRES_X86_SSE2;
20060 for (size_t k = 1; k <= 40; k += 9) {
20061 GemmMicrokernelTester()
20062 .mr(3)
20063 .nr(4)
20064 .kr(8)
20065 .sr(1)
20066 .m(3)
20067 .n(4)
20068 .k(k)
20069 .ks(3)
20070 .a_offset(127)
20071 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
20072 }
20073 }
20074
20075 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, zero) {
20076 TEST_REQUIRES_X86_SSE2;
20077 for (uint32_t mz = 0; mz < 3; mz++) {
20078 for (size_t k = 1; k <= 40; k += 9) {
20079 GemmMicrokernelTester()
20080 .mr(3)
20081 .nr(4)
20082 .kr(8)
20083 .sr(1)
20084 .m(3)
20085 .n(4)
20086 .k(k)
20087 .ks(3)
20088 .a_offset(127)
20089 .zero_index(mz)
20090 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
20091 }
20092 }
20093 }
20094
20095 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, qmin) {
20096 TEST_REQUIRES_X86_SSE2;
20097 GemmMicrokernelTester()
20098 .mr(3)
20099 .nr(4)
20100 .kr(8)
20101 .sr(1)
20102 .m(3)
20103 .n(4)
20104 .k(8)
20105 .qmin(128)
20106 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
20107 }
20108
20109 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, qmax) {
20110 TEST_REQUIRES_X86_SSE2;
20111 GemmMicrokernelTester()
20112 .mr(3)
20113 .nr(4)
20114 .kr(8)
20115 .sr(1)
20116 .m(3)
20117 .n(4)
20118 .k(8)
20119 .qmax(128)
20120 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
20121 }
20122
20123 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, strided_cm) {
20124 TEST_REQUIRES_X86_SSE2;
20125 GemmMicrokernelTester()
20126 .mr(3)
20127 .nr(4)
20128 .kr(8)
20129 .sr(1)
20130 .m(3)
20131 .n(4)
20132 .k(8)
20133 .cm_stride(7)
20134 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
20135 }
20136
20137 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, no_a_zero_point) {
20138 TEST_REQUIRES_X86_SSE2;
20139 for (size_t k = 1; k <= 40; k += 9) {
20140 GemmMicrokernelTester()
20141 .mr(3)
20142 .nr(4)
20143 .kr(8)
20144 .sr(1)
20145 .m(3)
20146 .n(4)
20147 .k(k)
20148 .a_zero_point(0)
20149 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
20150 }
20151 }
20152
20153 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, no_b_zero_point) {
20154 TEST_REQUIRES_X86_SSE2;
20155 for (size_t k = 1; k <= 40; k += 9) {
20156 GemmMicrokernelTester()
20157 .mr(3)
20158 .nr(4)
20159 .kr(8)
20160 .sr(1)
20161 .m(3)
20162 .n(4)
20163 .k(k)
20164 .b_zero_point(0)
20165 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
20166 }
20167 }
20168
20169 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, no_zero_point) {
20170 TEST_REQUIRES_X86_SSE2;
20171 for (size_t k = 1; k <= 40; k += 9) {
20172 GemmMicrokernelTester()
20173 .mr(3)
20174 .nr(4)
20175 .kr(8)
20176 .sr(1)
20177 .m(3)
20178 .n(4)
20179 .k(k)
20180 .a_zero_point(0)
20181 .b_zero_point(0)
20182 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
20183 }
20184 }
20185#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
20186
20187
20188#if XNN_ARCH_X86 || XNN_ARCH_X86_64
20189 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_eq_8) {
20190 TEST_REQUIRES_X86_SSE41;
20191 GemmMicrokernelTester()
20192 .mr(1)
20193 .nr(4)
20194 .kr(8)
20195 .sr(1)
20196 .m(1)
20197 .n(4)
20198 .k(8)
20199 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
20200 }
20201
20202 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, strided_cn) {
20203 TEST_REQUIRES_X86_SSE41;
20204 GemmMicrokernelTester()
20205 .mr(1)
20206 .nr(4)
20207 .kr(8)
20208 .sr(1)
20209 .m(1)
20210 .n(4)
20211 .k(8)
20212 .cn_stride(7)
20213 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
20214 }
20215
20216 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_eq_8_subtile) {
20217 TEST_REQUIRES_X86_SSE41;
20218 for (uint32_t m = 1; m <= 1; m++) {
20219 for (uint32_t n = 1; n <= 4; n++) {
20220 GemmMicrokernelTester()
20221 .mr(1)
20222 .nr(4)
20223 .kr(8)
20224 .sr(1)
20225 .m(m)
20226 .n(n)
20227 .k(8)
20228 .iterations(1)
20229 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
20230 }
20231 }
20232 }
20233
20234 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_eq_8_subtile_m) {
20235 TEST_REQUIRES_X86_SSE41;
20236 for (uint32_t m = 1; m <= 1; m++) {
20237 GemmMicrokernelTester()
20238 .mr(1)
20239 .nr(4)
20240 .kr(8)
20241 .sr(1)
20242 .m(m)
20243 .n(4)
20244 .k(8)
20245 .iterations(1)
20246 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
20247 }
20248 }
20249
20250 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_eq_8_subtile_n) {
20251 TEST_REQUIRES_X86_SSE41;
20252 for (uint32_t n = 1; n <= 4; n++) {
20253 GemmMicrokernelTester()
20254 .mr(1)
20255 .nr(4)
20256 .kr(8)
20257 .sr(1)
20258 .m(1)
20259 .n(n)
20260 .k(8)
20261 .iterations(1)
20262 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
20263 }
20264 }
20265
20266 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_lt_8) {
20267 TEST_REQUIRES_X86_SSE41;
20268 for (size_t k = 1; k < 8; k++) {
20269 GemmMicrokernelTester()
20270 .mr(1)
20271 .nr(4)
20272 .kr(8)
20273 .sr(1)
20274 .m(1)
20275 .n(4)
20276 .k(k)
20277 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
20278 }
20279 }
20280
20281 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_lt_8_subtile) {
20282 TEST_REQUIRES_X86_SSE41;
20283 for (size_t k = 1; k < 8; k++) {
20284 for (uint32_t m = 1; m <= 1; m++) {
20285 for (uint32_t n = 1; n <= 4; n++) {
20286 GemmMicrokernelTester()
20287 .mr(1)
20288 .nr(4)
20289 .kr(8)
20290 .sr(1)
20291 .m(m)
20292 .n(n)
20293 .k(k)
20294 .iterations(1)
20295 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
20296 }
20297 }
20298 }
20299 }
20300
20301 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_gt_8) {
20302 TEST_REQUIRES_X86_SSE41;
20303 for (size_t k = 9; k < 16; k++) {
20304 GemmMicrokernelTester()
20305 .mr(1)
20306 .nr(4)
20307 .kr(8)
20308 .sr(1)
20309 .m(1)
20310 .n(4)
20311 .k(k)
20312 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
20313 }
20314 }
20315
20316 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_gt_8_subtile) {
20317 TEST_REQUIRES_X86_SSE41;
20318 for (size_t k = 9; k < 16; k++) {
20319 for (uint32_t m = 1; m <= 1; m++) {
20320 for (uint32_t n = 1; n <= 4; n++) {
20321 GemmMicrokernelTester()
20322 .mr(1)
20323 .nr(4)
20324 .kr(8)
20325 .sr(1)
20326 .m(m)
20327 .n(n)
20328 .k(k)
20329 .iterations(1)
20330 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
20331 }
20332 }
20333 }
20334 }
20335
20336 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_div_8) {
20337 TEST_REQUIRES_X86_SSE41;
20338 for (size_t k = 16; k <= 80; k += 8) {
20339 GemmMicrokernelTester()
20340 .mr(1)
20341 .nr(4)
20342 .kr(8)
20343 .sr(1)
20344 .m(1)
20345 .n(4)
20346 .k(k)
20347 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
20348 }
20349 }
20350
20351 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_div_8_subtile) {
20352 TEST_REQUIRES_X86_SSE41;
20353 for (size_t k = 16; k <= 80; k += 8) {
20354 for (uint32_t m = 1; m <= 1; m++) {
20355 for (uint32_t n = 1; n <= 4; n++) {
20356 GemmMicrokernelTester()
20357 .mr(1)
20358 .nr(4)
20359 .kr(8)
20360 .sr(1)
20361 .m(m)
20362 .n(n)
20363 .k(k)
20364 .iterations(1)
20365 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
20366 }
20367 }
20368 }
20369 }
20370
20371 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, n_gt_4) {
20372 TEST_REQUIRES_X86_SSE41;
20373 for (uint32_t n = 5; n < 8; n++) {
20374 for (size_t k = 1; k <= 40; k += 9) {
20375 GemmMicrokernelTester()
20376 .mr(1)
20377 .nr(4)
20378 .kr(8)
20379 .sr(1)
20380 .m(1)
20381 .n(4)
20382 .k(k)
20383 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
20384 }
20385 }
20386 }
20387
20388 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, n_gt_4_strided_cn) {
20389 TEST_REQUIRES_X86_SSE41;
20390 for (uint32_t n = 5; n < 8; n++) {
20391 for (size_t k = 1; k <= 40; k += 9) {
20392 GemmMicrokernelTester()
20393 .mr(1)
20394 .nr(4)
20395 .kr(8)
20396 .sr(1)
20397 .m(1)
20398 .n(4)
20399 .k(k)
20400 .cn_stride(7)
20401 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
20402 }
20403 }
20404 }
20405
20406 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, n_gt_4_subtile) {
20407 TEST_REQUIRES_X86_SSE41;
20408 for (uint32_t n = 5; n < 8; n++) {
20409 for (size_t k = 1; k <= 40; k += 9) {
20410 for (uint32_t m = 1; m <= 1; m++) {
20411 GemmMicrokernelTester()
20412 .mr(1)
20413 .nr(4)
20414 .kr(8)
20415 .sr(1)
20416 .m(m)
20417 .n(n)
20418 .k(k)
20419 .iterations(1)
20420 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
20421 }
20422 }
20423 }
20424 }
20425
20426 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, n_div_4) {
20427 TEST_REQUIRES_X86_SSE41;
20428 for (uint32_t n = 8; n <= 12; n += 4) {
20429 for (size_t k = 1; k <= 40; k += 9) {
20430 GemmMicrokernelTester()
20431 .mr(1)
20432 .nr(4)
20433 .kr(8)
20434 .sr(1)
20435 .m(1)
20436 .n(4)
20437 .k(k)
20438 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
20439 }
20440 }
20441 }
20442
20443 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, n_div_4_strided_cn) {
20444 TEST_REQUIRES_X86_SSE41;
20445 for (uint32_t n = 8; n <= 12; n += 4) {
20446 for (size_t k = 1; k <= 40; k += 9) {
20447 GemmMicrokernelTester()
20448 .mr(1)
20449 .nr(4)
20450 .kr(8)
20451 .sr(1)
20452 .m(1)
20453 .n(n)
20454 .k(k)
20455 .cn_stride(7)
20456 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
20457 }
20458 }
20459 }
20460
20461 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, n_div_4_subtile) {
20462 TEST_REQUIRES_X86_SSE41;
20463 for (uint32_t n = 8; n <= 12; n += 4) {
20464 for (size_t k = 1; k <= 40; k += 9) {
20465 for (uint32_t m = 1; m <= 1; m++) {
20466 GemmMicrokernelTester()
20467 .mr(1)
20468 .nr(4)
20469 .kr(8)
20470 .sr(1)
20471 .m(m)
20472 .n(n)
20473 .k(k)
20474 .iterations(1)
20475 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
20476 }
20477 }
20478 }
20479 }
20480
20481 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, small_kernel) {
20482 TEST_REQUIRES_X86_SSE41;
20483 for (size_t k = 1; k <= 40; k += 9) {
20484 GemmMicrokernelTester()
20485 .mr(1)
20486 .nr(4)
20487 .kr(8)
20488 .sr(1)
20489 .m(1)
20490 .n(4)
20491 .k(k)
20492 .ks(3)
20493 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
20494 }
20495 }
20496
20497 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, small_kernel_subtile) {
20498 TEST_REQUIRES_X86_SSE41;
20499 for (size_t k = 1; k <= 40; k += 9) {
20500 for (uint32_t m = 1; m <= 1; m++) {
20501 for (uint32_t n = 1; n <= 4; n++) {
20502 GemmMicrokernelTester()
20503 .mr(1)
20504 .nr(4)
20505 .kr(8)
20506 .sr(1)
20507 .m(m)
20508 .n(n)
20509 .k(k)
20510 .ks(3)
20511 .iterations(1)
20512 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
20513 }
20514 }
20515 }
20516 }
20517
20518 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, n_gt_4_small_kernel) {
20519 TEST_REQUIRES_X86_SSE41;
20520 for (uint32_t n = 5; n < 8; n++) {
20521 for (size_t k = 1; k <= 40; k += 9) {
20522 GemmMicrokernelTester()
20523 .mr(1)
20524 .nr(4)
20525 .kr(8)
20526 .sr(1)
20527 .m(1)
20528 .n(4)
20529 .k(k)
20530 .ks(3)
20531 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
20532 }
20533 }
20534 }
20535
20536 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, n_div_4_small_kernel) {
20537 TEST_REQUIRES_X86_SSE41;
20538 for (uint32_t n = 8; n <= 12; n += 4) {
20539 for (size_t k = 1; k <= 40; k += 9) {
20540 GemmMicrokernelTester()
20541 .mr(1)
20542 .nr(4)
20543 .kr(8)
20544 .sr(1)
20545 .m(1)
20546 .n(4)
20547 .k(k)
20548 .ks(3)
20549 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
20550 }
20551 }
20552 }
20553
20554 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, strided_cm_subtile) {
20555 TEST_REQUIRES_X86_SSE41;
20556 for (size_t k = 1; k <= 40; k += 9) {
20557 for (uint32_t m = 1; m <= 1; m++) {
20558 for (uint32_t n = 1; n <= 4; n++) {
20559 GemmMicrokernelTester()
20560 .mr(1)
20561 .nr(4)
20562 .kr(8)
20563 .sr(1)
20564 .m(m)
20565 .n(n)
20566 .k(k)
20567 .cm_stride(7)
20568 .iterations(1)
20569 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
20570 }
20571 }
20572 }
20573 }
20574
20575 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, a_offset) {
20576 TEST_REQUIRES_X86_SSE41;
20577 for (size_t k = 1; k <= 40; k += 9) {
20578 GemmMicrokernelTester()
20579 .mr(1)
20580 .nr(4)
20581 .kr(8)
20582 .sr(1)
20583 .m(1)
20584 .n(4)
20585 .k(k)
20586 .ks(3)
20587 .a_offset(43)
20588 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
20589 }
20590 }
20591
20592 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, zero) {
20593 TEST_REQUIRES_X86_SSE41;
20594 for (uint32_t mz = 0; mz < 1; mz++) {
20595 for (size_t k = 1; k <= 40; k += 9) {
20596 GemmMicrokernelTester()
20597 .mr(1)
20598 .nr(4)
20599 .kr(8)
20600 .sr(1)
20601 .m(1)
20602 .n(4)
20603 .k(k)
20604 .ks(3)
20605 .a_offset(43)
20606 .zero_index(mz)
20607 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
20608 }
20609 }
20610 }
20611
20612 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, qmin) {
20613 TEST_REQUIRES_X86_SSE41;
20614 GemmMicrokernelTester()
20615 .mr(1)
20616 .nr(4)
20617 .kr(8)
20618 .sr(1)
20619 .m(1)
20620 .n(4)
20621 .k(8)
20622 .qmin(128)
20623 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
20624 }
20625
20626 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, qmax) {
20627 TEST_REQUIRES_X86_SSE41;
20628 GemmMicrokernelTester()
20629 .mr(1)
20630 .nr(4)
20631 .kr(8)
20632 .sr(1)
20633 .m(1)
20634 .n(4)
20635 .k(8)
20636 .qmax(128)
20637 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
20638 }
20639
20640 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, strided_cm) {
20641 TEST_REQUIRES_X86_SSE41;
20642 GemmMicrokernelTester()
20643 .mr(1)
20644 .nr(4)
20645 .kr(8)
20646 .sr(1)
20647 .m(1)
20648 .n(4)
20649 .k(8)
20650 .cm_stride(7)
20651 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
20652 }
20653
20654 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, no_a_zero_point) {
20655 TEST_REQUIRES_X86_SSE41;
20656 for (size_t k = 1; k <= 40; k += 9) {
20657 GemmMicrokernelTester()
20658 .mr(1)
20659 .nr(4)
20660 .kr(8)
20661 .sr(1)
20662 .m(1)
20663 .n(4)
20664 .k(k)
20665 .a_zero_point(0)
20666 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
20667 }
20668 }
20669
20670 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, no_b_zero_point) {
20671 TEST_REQUIRES_X86_SSE41;
20672 for (size_t k = 1; k <= 40; k += 9) {
20673 GemmMicrokernelTester()
20674 .mr(1)
20675 .nr(4)
20676 .kr(8)
20677 .sr(1)
20678 .m(1)
20679 .n(4)
20680 .k(k)
20681 .b_zero_point(0)
20682 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
20683 }
20684 }
20685
20686 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, no_zero_point) {
20687 TEST_REQUIRES_X86_SSE41;
20688 for (size_t k = 1; k <= 40; k += 9) {
20689 GemmMicrokernelTester()
20690 .mr(1)
20691 .nr(4)
20692 .kr(8)
20693 .sr(1)
20694 .m(1)
20695 .n(4)
20696 .k(k)
20697 .a_zero_point(0)
20698 .b_zero_point(0)
20699 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
20700 }
20701 }
20702#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
20703
20704
20705#if XNN_ARCH_X86 || XNN_ARCH_X86_64
20706 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_eq_8) {
20707 TEST_REQUIRES_X86_SSE41;
20708 GemmMicrokernelTester()
20709 .mr(2)
20710 .nr(4)
20711 .kr(8)
20712 .sr(1)
20713 .m(2)
20714 .n(4)
20715 .k(8)
20716 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
20717 }
20718
20719 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, strided_cn) {
20720 TEST_REQUIRES_X86_SSE41;
20721 GemmMicrokernelTester()
20722 .mr(2)
20723 .nr(4)
20724 .kr(8)
20725 .sr(1)
20726 .m(2)
20727 .n(4)
20728 .k(8)
20729 .cn_stride(7)
20730 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
20731 }
20732
20733 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_eq_8_subtile) {
20734 TEST_REQUIRES_X86_SSE41;
20735 for (uint32_t m = 1; m <= 2; m++) {
20736 for (uint32_t n = 1; n <= 4; n++) {
20737 GemmMicrokernelTester()
20738 .mr(2)
20739 .nr(4)
20740 .kr(8)
20741 .sr(1)
20742 .m(m)
20743 .n(n)
20744 .k(8)
20745 .iterations(1)
20746 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
20747 }
20748 }
20749 }
20750
20751 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_eq_8_subtile_m) {
20752 TEST_REQUIRES_X86_SSE41;
20753 for (uint32_t m = 1; m <= 2; m++) {
20754 GemmMicrokernelTester()
20755 .mr(2)
20756 .nr(4)
20757 .kr(8)
20758 .sr(1)
20759 .m(m)
20760 .n(4)
20761 .k(8)
20762 .iterations(1)
20763 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
20764 }
20765 }
20766
20767 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_eq_8_subtile_n) {
20768 TEST_REQUIRES_X86_SSE41;
20769 for (uint32_t n = 1; n <= 4; n++) {
20770 GemmMicrokernelTester()
20771 .mr(2)
20772 .nr(4)
20773 .kr(8)
20774 .sr(1)
20775 .m(2)
20776 .n(n)
20777 .k(8)
20778 .iterations(1)
20779 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
20780 }
20781 }
20782
20783 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_lt_8) {
20784 TEST_REQUIRES_X86_SSE41;
20785 for (size_t k = 1; k < 8; k++) {
20786 GemmMicrokernelTester()
20787 .mr(2)
20788 .nr(4)
20789 .kr(8)
20790 .sr(1)
20791 .m(2)
20792 .n(4)
20793 .k(k)
20794 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
20795 }
20796 }
20797
20798 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_lt_8_subtile) {
20799 TEST_REQUIRES_X86_SSE41;
20800 for (size_t k = 1; k < 8; k++) {
20801 for (uint32_t m = 1; m <= 2; m++) {
20802 for (uint32_t n = 1; n <= 4; n++) {
20803 GemmMicrokernelTester()
20804 .mr(2)
20805 .nr(4)
20806 .kr(8)
20807 .sr(1)
20808 .m(m)
20809 .n(n)
20810 .k(k)
20811 .iterations(1)
20812 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
20813 }
20814 }
20815 }
20816 }
20817
20818 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_gt_8) {
20819 TEST_REQUIRES_X86_SSE41;
20820 for (size_t k = 9; k < 16; k++) {
20821 GemmMicrokernelTester()
20822 .mr(2)
20823 .nr(4)
20824 .kr(8)
20825 .sr(1)
20826 .m(2)
20827 .n(4)
20828 .k(k)
20829 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
20830 }
20831 }
20832
20833 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_gt_8_subtile) {
20834 TEST_REQUIRES_X86_SSE41;
20835 for (size_t k = 9; k < 16; k++) {
20836 for (uint32_t m = 1; m <= 2; m++) {
20837 for (uint32_t n = 1; n <= 4; n++) {
20838 GemmMicrokernelTester()
20839 .mr(2)
20840 .nr(4)
20841 .kr(8)
20842 .sr(1)
20843 .m(m)
20844 .n(n)
20845 .k(k)
20846 .iterations(1)
20847 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
20848 }
20849 }
20850 }
20851 }
20852
20853 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_div_8) {
20854 TEST_REQUIRES_X86_SSE41;
20855 for (size_t k = 16; k <= 80; k += 8) {
20856 GemmMicrokernelTester()
20857 .mr(2)
20858 .nr(4)
20859 .kr(8)
20860 .sr(1)
20861 .m(2)
20862 .n(4)
20863 .k(k)
20864 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
20865 }
20866 }
20867
20868 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_div_8_subtile) {
20869 TEST_REQUIRES_X86_SSE41;
20870 for (size_t k = 16; k <= 80; k += 8) {
20871 for (uint32_t m = 1; m <= 2; m++) {
20872 for (uint32_t n = 1; n <= 4; n++) {
20873 GemmMicrokernelTester()
20874 .mr(2)
20875 .nr(4)
20876 .kr(8)
20877 .sr(1)
20878 .m(m)
20879 .n(n)
20880 .k(k)
20881 .iterations(1)
20882 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
20883 }
20884 }
20885 }
20886 }
20887
20888 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, n_gt_4) {
20889 TEST_REQUIRES_X86_SSE41;
20890 for (uint32_t n = 5; n < 8; n++) {
20891 for (size_t k = 1; k <= 40; k += 9) {
20892 GemmMicrokernelTester()
20893 .mr(2)
20894 .nr(4)
20895 .kr(8)
20896 .sr(1)
20897 .m(2)
20898 .n(4)
20899 .k(k)
20900 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
20901 }
20902 }
20903 }
20904
20905 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, n_gt_4_strided_cn) {
20906 TEST_REQUIRES_X86_SSE41;
20907 for (uint32_t n = 5; n < 8; n++) {
20908 for (size_t k = 1; k <= 40; k += 9) {
20909 GemmMicrokernelTester()
20910 .mr(2)
20911 .nr(4)
20912 .kr(8)
20913 .sr(1)
20914 .m(2)
20915 .n(4)
20916 .k(k)
20917 .cn_stride(7)
20918 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
20919 }
20920 }
20921 }
20922
20923 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, n_gt_4_subtile) {
20924 TEST_REQUIRES_X86_SSE41;
20925 for (uint32_t n = 5; n < 8; n++) {
20926 for (size_t k = 1; k <= 40; k += 9) {
20927 for (uint32_t m = 1; m <= 2; m++) {
20928 GemmMicrokernelTester()
20929 .mr(2)
20930 .nr(4)
20931 .kr(8)
20932 .sr(1)
20933 .m(m)
20934 .n(n)
20935 .k(k)
20936 .iterations(1)
20937 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
20938 }
20939 }
20940 }
20941 }
20942
20943 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, n_div_4) {
20944 TEST_REQUIRES_X86_SSE41;
20945 for (uint32_t n = 8; n <= 12; n += 4) {
20946 for (size_t k = 1; k <= 40; k += 9) {
20947 GemmMicrokernelTester()
20948 .mr(2)
20949 .nr(4)
20950 .kr(8)
20951 .sr(1)
20952 .m(2)
20953 .n(4)
20954 .k(k)
20955 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
20956 }
20957 }
20958 }
20959
20960 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, n_div_4_strided_cn) {
20961 TEST_REQUIRES_X86_SSE41;
20962 for (uint32_t n = 8; n <= 12; n += 4) {
20963 for (size_t k = 1; k <= 40; k += 9) {
20964 GemmMicrokernelTester()
20965 .mr(2)
20966 .nr(4)
20967 .kr(8)
20968 .sr(1)
20969 .m(2)
20970 .n(n)
20971 .k(k)
20972 .cn_stride(7)
20973 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
20974 }
20975 }
20976 }
20977
20978 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, n_div_4_subtile) {
20979 TEST_REQUIRES_X86_SSE41;
20980 for (uint32_t n = 8; n <= 12; n += 4) {
20981 for (size_t k = 1; k <= 40; k += 9) {
20982 for (uint32_t m = 1; m <= 2; m++) {
20983 GemmMicrokernelTester()
20984 .mr(2)
20985 .nr(4)
20986 .kr(8)
20987 .sr(1)
20988 .m(m)
20989 .n(n)
20990 .k(k)
20991 .iterations(1)
20992 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
20993 }
20994 }
20995 }
20996 }
20997
20998 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, small_kernel) {
20999 TEST_REQUIRES_X86_SSE41;
21000 for (size_t k = 1; k <= 40; k += 9) {
21001 GemmMicrokernelTester()
21002 .mr(2)
21003 .nr(4)
21004 .kr(8)
21005 .sr(1)
21006 .m(2)
21007 .n(4)
21008 .k(k)
21009 .ks(3)
21010 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
21011 }
21012 }
21013
21014 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, small_kernel_subtile) {
21015 TEST_REQUIRES_X86_SSE41;
21016 for (size_t k = 1; k <= 40; k += 9) {
21017 for (uint32_t m = 1; m <= 2; m++) {
21018 for (uint32_t n = 1; n <= 4; n++) {
21019 GemmMicrokernelTester()
21020 .mr(2)
21021 .nr(4)
21022 .kr(8)
21023 .sr(1)
21024 .m(m)
21025 .n(n)
21026 .k(k)
21027 .ks(3)
21028 .iterations(1)
21029 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
21030 }
21031 }
21032 }
21033 }
21034
21035 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, n_gt_4_small_kernel) {
21036 TEST_REQUIRES_X86_SSE41;
21037 for (uint32_t n = 5; n < 8; n++) {
21038 for (size_t k = 1; k <= 40; k += 9) {
21039 GemmMicrokernelTester()
21040 .mr(2)
21041 .nr(4)
21042 .kr(8)
21043 .sr(1)
21044 .m(2)
21045 .n(4)
21046 .k(k)
21047 .ks(3)
21048 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
21049 }
21050 }
21051 }
21052
21053 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, n_div_4_small_kernel) {
21054 TEST_REQUIRES_X86_SSE41;
21055 for (uint32_t n = 8; n <= 12; n += 4) {
21056 for (size_t k = 1; k <= 40; k += 9) {
21057 GemmMicrokernelTester()
21058 .mr(2)
21059 .nr(4)
21060 .kr(8)
21061 .sr(1)
21062 .m(2)
21063 .n(4)
21064 .k(k)
21065 .ks(3)
21066 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
21067 }
21068 }
21069 }
21070
21071 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, strided_cm_subtile) {
21072 TEST_REQUIRES_X86_SSE41;
21073 for (size_t k = 1; k <= 40; k += 9) {
21074 for (uint32_t m = 1; m <= 2; m++) {
21075 for (uint32_t n = 1; n <= 4; n++) {
21076 GemmMicrokernelTester()
21077 .mr(2)
21078 .nr(4)
21079 .kr(8)
21080 .sr(1)
21081 .m(m)
21082 .n(n)
21083 .k(k)
21084 .cm_stride(7)
21085 .iterations(1)
21086 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
21087 }
21088 }
21089 }
21090 }
21091
21092 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, a_offset) {
21093 TEST_REQUIRES_X86_SSE41;
21094 for (size_t k = 1; k <= 40; k += 9) {
21095 GemmMicrokernelTester()
21096 .mr(2)
21097 .nr(4)
21098 .kr(8)
21099 .sr(1)
21100 .m(2)
21101 .n(4)
21102 .k(k)
21103 .ks(3)
21104 .a_offset(83)
21105 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
21106 }
21107 }
21108
21109 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, zero) {
21110 TEST_REQUIRES_X86_SSE41;
21111 for (uint32_t mz = 0; mz < 2; mz++) {
21112 for (size_t k = 1; k <= 40; k += 9) {
21113 GemmMicrokernelTester()
21114 .mr(2)
21115 .nr(4)
21116 .kr(8)
21117 .sr(1)
21118 .m(2)
21119 .n(4)
21120 .k(k)
21121 .ks(3)
21122 .a_offset(83)
21123 .zero_index(mz)
21124 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
21125 }
21126 }
21127 }
21128
21129 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, qmin) {
21130 TEST_REQUIRES_X86_SSE41;
21131 GemmMicrokernelTester()
21132 .mr(2)
21133 .nr(4)
21134 .kr(8)
21135 .sr(1)
21136 .m(2)
21137 .n(4)
21138 .k(8)
21139 .qmin(128)
21140 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
21141 }
21142
21143 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, qmax) {
21144 TEST_REQUIRES_X86_SSE41;
21145 GemmMicrokernelTester()
21146 .mr(2)
21147 .nr(4)
21148 .kr(8)
21149 .sr(1)
21150 .m(2)
21151 .n(4)
21152 .k(8)
21153 .qmax(128)
21154 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
21155 }
21156
21157 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, strided_cm) {
21158 TEST_REQUIRES_X86_SSE41;
21159 GemmMicrokernelTester()
21160 .mr(2)
21161 .nr(4)
21162 .kr(8)
21163 .sr(1)
21164 .m(2)
21165 .n(4)
21166 .k(8)
21167 .cm_stride(7)
21168 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
21169 }
21170
21171 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, no_a_zero_point) {
21172 TEST_REQUIRES_X86_SSE41;
21173 for (size_t k = 1; k <= 40; k += 9) {
21174 GemmMicrokernelTester()
21175 .mr(2)
21176 .nr(4)
21177 .kr(8)
21178 .sr(1)
21179 .m(2)
21180 .n(4)
21181 .k(k)
21182 .a_zero_point(0)
21183 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
21184 }
21185 }
21186
21187 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, no_b_zero_point) {
21188 TEST_REQUIRES_X86_SSE41;
21189 for (size_t k = 1; k <= 40; k += 9) {
21190 GemmMicrokernelTester()
21191 .mr(2)
21192 .nr(4)
21193 .kr(8)
21194 .sr(1)
21195 .m(2)
21196 .n(4)
21197 .k(k)
21198 .b_zero_point(0)
21199 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
21200 }
21201 }
21202
21203 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, no_zero_point) {
21204 TEST_REQUIRES_X86_SSE41;
21205 for (size_t k = 1; k <= 40; k += 9) {
21206 GemmMicrokernelTester()
21207 .mr(2)
21208 .nr(4)
21209 .kr(8)
21210 .sr(1)
21211 .m(2)
21212 .n(4)
21213 .k(k)
21214 .a_zero_point(0)
21215 .b_zero_point(0)
21216 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
21217 }
21218 }
21219#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
21220
21221
21222#if XNN_ARCH_X86 || XNN_ARCH_X86_64
21223 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_eq_8) {
21224 TEST_REQUIRES_X86_SSE41;
21225 GemmMicrokernelTester()
21226 .mr(3)
21227 .nr(4)
21228 .kr(8)
21229 .sr(1)
21230 .m(3)
21231 .n(4)
21232 .k(8)
21233 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
21234 }
21235
21236 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, strided_cn) {
21237 TEST_REQUIRES_X86_SSE41;
21238 GemmMicrokernelTester()
21239 .mr(3)
21240 .nr(4)
21241 .kr(8)
21242 .sr(1)
21243 .m(3)
21244 .n(4)
21245 .k(8)
21246 .cn_stride(7)
21247 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
21248 }
21249
21250 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_eq_8_subtile) {
21251 TEST_REQUIRES_X86_SSE41;
21252 for (uint32_t m = 1; m <= 3; m++) {
21253 for (uint32_t n = 1; n <= 4; n++) {
21254 GemmMicrokernelTester()
21255 .mr(3)
21256 .nr(4)
21257 .kr(8)
21258 .sr(1)
21259 .m(m)
21260 .n(n)
21261 .k(8)
21262 .iterations(1)
21263 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
21264 }
21265 }
21266 }
21267
21268 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_eq_8_subtile_m) {
21269 TEST_REQUIRES_X86_SSE41;
21270 for (uint32_t m = 1; m <= 3; m++) {
21271 GemmMicrokernelTester()
21272 .mr(3)
21273 .nr(4)
21274 .kr(8)
21275 .sr(1)
21276 .m(m)
21277 .n(4)
21278 .k(8)
21279 .iterations(1)
21280 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
21281 }
21282 }
21283
21284 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_eq_8_subtile_n) {
21285 TEST_REQUIRES_X86_SSE41;
21286 for (uint32_t n = 1; n <= 4; n++) {
21287 GemmMicrokernelTester()
21288 .mr(3)
21289 .nr(4)
21290 .kr(8)
21291 .sr(1)
21292 .m(3)
21293 .n(n)
21294 .k(8)
21295 .iterations(1)
21296 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
21297 }
21298 }
21299
21300 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_lt_8) {
21301 TEST_REQUIRES_X86_SSE41;
21302 for (size_t k = 1; k < 8; k++) {
21303 GemmMicrokernelTester()
21304 .mr(3)
21305 .nr(4)
21306 .kr(8)
21307 .sr(1)
21308 .m(3)
21309 .n(4)
21310 .k(k)
21311 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
21312 }
21313 }
21314
21315 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_lt_8_subtile) {
21316 TEST_REQUIRES_X86_SSE41;
21317 for (size_t k = 1; k < 8; k++) {
21318 for (uint32_t m = 1; m <= 3; m++) {
21319 for (uint32_t n = 1; n <= 4; n++) {
21320 GemmMicrokernelTester()
21321 .mr(3)
21322 .nr(4)
21323 .kr(8)
21324 .sr(1)
21325 .m(m)
21326 .n(n)
21327 .k(k)
21328 .iterations(1)
21329 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
21330 }
21331 }
21332 }
21333 }
21334
21335 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_gt_8) {
21336 TEST_REQUIRES_X86_SSE41;
21337 for (size_t k = 9; k < 16; k++) {
21338 GemmMicrokernelTester()
21339 .mr(3)
21340 .nr(4)
21341 .kr(8)
21342 .sr(1)
21343 .m(3)
21344 .n(4)
21345 .k(k)
21346 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
21347 }
21348 }
21349
21350 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_gt_8_subtile) {
21351 TEST_REQUIRES_X86_SSE41;
21352 for (size_t k = 9; k < 16; k++) {
21353 for (uint32_t m = 1; m <= 3; m++) {
21354 for (uint32_t n = 1; n <= 4; n++) {
21355 GemmMicrokernelTester()
21356 .mr(3)
21357 .nr(4)
21358 .kr(8)
21359 .sr(1)
21360 .m(m)
21361 .n(n)
21362 .k(k)
21363 .iterations(1)
21364 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
21365 }
21366 }
21367 }
21368 }
21369
21370 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_div_8) {
21371 TEST_REQUIRES_X86_SSE41;
21372 for (size_t k = 16; k <= 80; k += 8) {
21373 GemmMicrokernelTester()
21374 .mr(3)
21375 .nr(4)
21376 .kr(8)
21377 .sr(1)
21378 .m(3)
21379 .n(4)
21380 .k(k)
21381 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
21382 }
21383 }
21384
21385 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_div_8_subtile) {
21386 TEST_REQUIRES_X86_SSE41;
21387 for (size_t k = 16; k <= 80; k += 8) {
21388 for (uint32_t m = 1; m <= 3; m++) {
21389 for (uint32_t n = 1; n <= 4; n++) {
21390 GemmMicrokernelTester()
21391 .mr(3)
21392 .nr(4)
21393 .kr(8)
21394 .sr(1)
21395 .m(m)
21396 .n(n)
21397 .k(k)
21398 .iterations(1)
21399 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
21400 }
21401 }
21402 }
21403 }
21404
21405 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_gt_4) {
21406 TEST_REQUIRES_X86_SSE41;
21407 for (uint32_t n = 5; n < 8; n++) {
21408 for (size_t k = 1; k <= 40; k += 9) {
21409 GemmMicrokernelTester()
21410 .mr(3)
21411 .nr(4)
21412 .kr(8)
21413 .sr(1)
21414 .m(3)
21415 .n(4)
21416 .k(k)
21417 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
21418 }
21419 }
21420 }
21421
21422 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_gt_4_strided_cn) {
21423 TEST_REQUIRES_X86_SSE41;
21424 for (uint32_t n = 5; n < 8; n++) {
21425 for (size_t k = 1; k <= 40; k += 9) {
21426 GemmMicrokernelTester()
21427 .mr(3)
21428 .nr(4)
21429 .kr(8)
21430 .sr(1)
21431 .m(3)
21432 .n(4)
21433 .k(k)
21434 .cn_stride(7)
21435 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
21436 }
21437 }
21438 }
21439
21440 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_gt_4_subtile) {
21441 TEST_REQUIRES_X86_SSE41;
21442 for (uint32_t n = 5; n < 8; n++) {
21443 for (size_t k = 1; k <= 40; k += 9) {
21444 for (uint32_t m = 1; m <= 3; m++) {
21445 GemmMicrokernelTester()
21446 .mr(3)
21447 .nr(4)
21448 .kr(8)
21449 .sr(1)
21450 .m(m)
21451 .n(n)
21452 .k(k)
21453 .iterations(1)
21454 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
21455 }
21456 }
21457 }
21458 }
21459
21460 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_div_4) {
21461 TEST_REQUIRES_X86_SSE41;
21462 for (uint32_t n = 8; n <= 12; n += 4) {
21463 for (size_t k = 1; k <= 40; k += 9) {
21464 GemmMicrokernelTester()
21465 .mr(3)
21466 .nr(4)
21467 .kr(8)
21468 .sr(1)
21469 .m(3)
21470 .n(4)
21471 .k(k)
21472 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
21473 }
21474 }
21475 }
21476
21477 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_div_4_strided_cn) {
21478 TEST_REQUIRES_X86_SSE41;
21479 for (uint32_t n = 8; n <= 12; n += 4) {
21480 for (size_t k = 1; k <= 40; k += 9) {
21481 GemmMicrokernelTester()
21482 .mr(3)
21483 .nr(4)
21484 .kr(8)
21485 .sr(1)
21486 .m(3)
21487 .n(n)
21488 .k(k)
21489 .cn_stride(7)
21490 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
21491 }
21492 }
21493 }
21494
21495 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_div_4_subtile) {
21496 TEST_REQUIRES_X86_SSE41;
21497 for (uint32_t n = 8; n <= 12; n += 4) {
21498 for (size_t k = 1; k <= 40; k += 9) {
21499 for (uint32_t m = 1; m <= 3; m++) {
21500 GemmMicrokernelTester()
21501 .mr(3)
21502 .nr(4)
21503 .kr(8)
21504 .sr(1)
21505 .m(m)
21506 .n(n)
21507 .k(k)
21508 .iterations(1)
21509 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
21510 }
21511 }
21512 }
21513 }
21514
21515 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, small_kernel) {
21516 TEST_REQUIRES_X86_SSE41;
21517 for (size_t k = 1; k <= 40; k += 9) {
21518 GemmMicrokernelTester()
21519 .mr(3)
21520 .nr(4)
21521 .kr(8)
21522 .sr(1)
21523 .m(3)
21524 .n(4)
21525 .k(k)
21526 .ks(3)
21527 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
21528 }
21529 }
21530
21531 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, small_kernel_subtile) {
21532 TEST_REQUIRES_X86_SSE41;
21533 for (size_t k = 1; k <= 40; k += 9) {
21534 for (uint32_t m = 1; m <= 3; m++) {
21535 for (uint32_t n = 1; n <= 4; n++) {
21536 GemmMicrokernelTester()
21537 .mr(3)
21538 .nr(4)
21539 .kr(8)
21540 .sr(1)
21541 .m(m)
21542 .n(n)
21543 .k(k)
21544 .ks(3)
21545 .iterations(1)
21546 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
21547 }
21548 }
21549 }
21550 }
21551
21552 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_gt_4_small_kernel) {
21553 TEST_REQUIRES_X86_SSE41;
21554 for (uint32_t n = 5; n < 8; n++) {
21555 for (size_t k = 1; k <= 40; k += 9) {
21556 GemmMicrokernelTester()
21557 .mr(3)
21558 .nr(4)
21559 .kr(8)
21560 .sr(1)
21561 .m(3)
21562 .n(4)
21563 .k(k)
21564 .ks(3)
21565 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
21566 }
21567 }
21568 }
21569
21570 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_div_4_small_kernel) {
21571 TEST_REQUIRES_X86_SSE41;
21572 for (uint32_t n = 8; n <= 12; n += 4) {
21573 for (size_t k = 1; k <= 40; k += 9) {
21574 GemmMicrokernelTester()
21575 .mr(3)
21576 .nr(4)
21577 .kr(8)
21578 .sr(1)
21579 .m(3)
21580 .n(4)
21581 .k(k)
21582 .ks(3)
21583 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
21584 }
21585 }
21586 }
21587
21588 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, strided_cm_subtile) {
21589 TEST_REQUIRES_X86_SSE41;
21590 for (size_t k = 1; k <= 40; k += 9) {
21591 for (uint32_t m = 1; m <= 3; m++) {
21592 for (uint32_t n = 1; n <= 4; n++) {
21593 GemmMicrokernelTester()
21594 .mr(3)
21595 .nr(4)
21596 .kr(8)
21597 .sr(1)
21598 .m(m)
21599 .n(n)
21600 .k(k)
21601 .cm_stride(7)
21602 .iterations(1)
21603 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
21604 }
21605 }
21606 }
21607 }
21608
21609 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, a_offset) {
21610 TEST_REQUIRES_X86_SSE41;
21611 for (size_t k = 1; k <= 40; k += 9) {
21612 GemmMicrokernelTester()
21613 .mr(3)
21614 .nr(4)
21615 .kr(8)
21616 .sr(1)
21617 .m(3)
21618 .n(4)
21619 .k(k)
21620 .ks(3)
21621 .a_offset(127)
21622 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
21623 }
21624 }
21625
21626 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, zero) {
21627 TEST_REQUIRES_X86_SSE41;
21628 for (uint32_t mz = 0; mz < 3; mz++) {
21629 for (size_t k = 1; k <= 40; k += 9) {
21630 GemmMicrokernelTester()
21631 .mr(3)
21632 .nr(4)
21633 .kr(8)
21634 .sr(1)
21635 .m(3)
21636 .n(4)
21637 .k(k)
21638 .ks(3)
21639 .a_offset(127)
21640 .zero_index(mz)
21641 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
21642 }
21643 }
21644 }
21645
21646 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, qmin) {
21647 TEST_REQUIRES_X86_SSE41;
21648 GemmMicrokernelTester()
21649 .mr(3)
21650 .nr(4)
21651 .kr(8)
21652 .sr(1)
21653 .m(3)
21654 .n(4)
21655 .k(8)
21656 .qmin(128)
21657 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
21658 }
21659
21660 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, qmax) {
21661 TEST_REQUIRES_X86_SSE41;
21662 GemmMicrokernelTester()
21663 .mr(3)
21664 .nr(4)
21665 .kr(8)
21666 .sr(1)
21667 .m(3)
21668 .n(4)
21669 .k(8)
21670 .qmax(128)
21671 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
21672 }
21673
21674 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, strided_cm) {
21675 TEST_REQUIRES_X86_SSE41;
21676 GemmMicrokernelTester()
21677 .mr(3)
21678 .nr(4)
21679 .kr(8)
21680 .sr(1)
21681 .m(3)
21682 .n(4)
21683 .k(8)
21684 .cm_stride(7)
21685 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
21686 }
21687
21688 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, no_a_zero_point) {
21689 TEST_REQUIRES_X86_SSE41;
21690 for (size_t k = 1; k <= 40; k += 9) {
21691 GemmMicrokernelTester()
21692 .mr(3)
21693 .nr(4)
21694 .kr(8)
21695 .sr(1)
21696 .m(3)
21697 .n(4)
21698 .k(k)
21699 .a_zero_point(0)
21700 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
21701 }
21702 }
21703
21704 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, no_b_zero_point) {
21705 TEST_REQUIRES_X86_SSE41;
21706 for (size_t k = 1; k <= 40; k += 9) {
21707 GemmMicrokernelTester()
21708 .mr(3)
21709 .nr(4)
21710 .kr(8)
21711 .sr(1)
21712 .m(3)
21713 .n(4)
21714 .k(k)
21715 .b_zero_point(0)
21716 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
21717 }
21718 }
21719
21720 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, no_zero_point) {
21721 TEST_REQUIRES_X86_SSE41;
21722 for (size_t k = 1; k <= 40; k += 9) {
21723 GemmMicrokernelTester()
21724 .mr(3)
21725 .nr(4)
21726 .kr(8)
21727 .sr(1)
21728 .m(3)
21729 .n(4)
21730 .k(k)
21731 .a_zero_point(0)
21732 .b_zero_point(0)
21733 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
21734 }
21735 }
21736#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
21737
21738
21739#if XNN_ARCH_X86 || XNN_ARCH_X86_64
21740 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, k_eq_8) {
21741 TEST_REQUIRES_X86_AVX;
21742 GemmMicrokernelTester()
21743 .mr(1)
21744 .nr(4)
21745 .kr(8)
21746 .sr(1)
21747 .m(1)
21748 .n(4)
21749 .k(8)
21750 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
21751 }
21752
21753 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, strided_cn) {
21754 TEST_REQUIRES_X86_AVX;
21755 GemmMicrokernelTester()
21756 .mr(1)
21757 .nr(4)
21758 .kr(8)
21759 .sr(1)
21760 .m(1)
21761 .n(4)
21762 .k(8)
21763 .cn_stride(7)
21764 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
21765 }
21766
21767 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, k_eq_8_subtile) {
21768 TEST_REQUIRES_X86_AVX;
21769 for (uint32_t m = 1; m <= 1; m++) {
21770 for (uint32_t n = 1; n <= 4; n++) {
21771 GemmMicrokernelTester()
21772 .mr(1)
21773 .nr(4)
21774 .kr(8)
21775 .sr(1)
21776 .m(m)
21777 .n(n)
21778 .k(8)
21779 .iterations(1)
21780 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
21781 }
21782 }
21783 }
21784
21785 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, k_eq_8_subtile_m) {
21786 TEST_REQUIRES_X86_AVX;
21787 for (uint32_t m = 1; m <= 1; m++) {
21788 GemmMicrokernelTester()
21789 .mr(1)
21790 .nr(4)
21791 .kr(8)
21792 .sr(1)
21793 .m(m)
21794 .n(4)
21795 .k(8)
21796 .iterations(1)
21797 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
21798 }
21799 }
21800
21801 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, k_eq_8_subtile_n) {
21802 TEST_REQUIRES_X86_AVX;
21803 for (uint32_t n = 1; n <= 4; n++) {
21804 GemmMicrokernelTester()
21805 .mr(1)
21806 .nr(4)
21807 .kr(8)
21808 .sr(1)
21809 .m(1)
21810 .n(n)
21811 .k(8)
21812 .iterations(1)
21813 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
21814 }
21815 }
21816
21817 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, k_lt_8) {
21818 TEST_REQUIRES_X86_AVX;
21819 for (size_t k = 1; k < 8; k++) {
21820 GemmMicrokernelTester()
21821 .mr(1)
21822 .nr(4)
21823 .kr(8)
21824 .sr(1)
21825 .m(1)
21826 .n(4)
21827 .k(k)
21828 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
21829 }
21830 }
21831
21832 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, k_lt_8_subtile) {
21833 TEST_REQUIRES_X86_AVX;
21834 for (size_t k = 1; k < 8; k++) {
21835 for (uint32_t m = 1; m <= 1; m++) {
21836 for (uint32_t n = 1; n <= 4; n++) {
21837 GemmMicrokernelTester()
21838 .mr(1)
21839 .nr(4)
21840 .kr(8)
21841 .sr(1)
21842 .m(m)
21843 .n(n)
21844 .k(k)
21845 .iterations(1)
21846 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
21847 }
21848 }
21849 }
21850 }
21851
21852 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, k_gt_8) {
21853 TEST_REQUIRES_X86_AVX;
21854 for (size_t k = 9; k < 16; k++) {
21855 GemmMicrokernelTester()
21856 .mr(1)
21857 .nr(4)
21858 .kr(8)
21859 .sr(1)
21860 .m(1)
21861 .n(4)
21862 .k(k)
21863 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
21864 }
21865 }
21866
21867 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, k_gt_8_subtile) {
21868 TEST_REQUIRES_X86_AVX;
21869 for (size_t k = 9; k < 16; k++) {
21870 for (uint32_t m = 1; m <= 1; m++) {
21871 for (uint32_t n = 1; n <= 4; n++) {
21872 GemmMicrokernelTester()
21873 .mr(1)
21874 .nr(4)
21875 .kr(8)
21876 .sr(1)
21877 .m(m)
21878 .n(n)
21879 .k(k)
21880 .iterations(1)
21881 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
21882 }
21883 }
21884 }
21885 }
21886
21887 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, k_div_8) {
21888 TEST_REQUIRES_X86_AVX;
21889 for (size_t k = 16; k <= 80; k += 8) {
21890 GemmMicrokernelTester()
21891 .mr(1)
21892 .nr(4)
21893 .kr(8)
21894 .sr(1)
21895 .m(1)
21896 .n(4)
21897 .k(k)
21898 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
21899 }
21900 }
21901
21902 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, k_div_8_subtile) {
21903 TEST_REQUIRES_X86_AVX;
21904 for (size_t k = 16; k <= 80; k += 8) {
21905 for (uint32_t m = 1; m <= 1; m++) {
21906 for (uint32_t n = 1; n <= 4; n++) {
21907 GemmMicrokernelTester()
21908 .mr(1)
21909 .nr(4)
21910 .kr(8)
21911 .sr(1)
21912 .m(m)
21913 .n(n)
21914 .k(k)
21915 .iterations(1)
21916 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
21917 }
21918 }
21919 }
21920 }
21921
21922 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, n_gt_4) {
21923 TEST_REQUIRES_X86_AVX;
21924 for (uint32_t n = 5; n < 8; n++) {
21925 for (size_t k = 1; k <= 40; k += 9) {
21926 GemmMicrokernelTester()
21927 .mr(1)
21928 .nr(4)
21929 .kr(8)
21930 .sr(1)
21931 .m(1)
21932 .n(4)
21933 .k(k)
21934 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
21935 }
21936 }
21937 }
21938
21939 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, n_gt_4_strided_cn) {
21940 TEST_REQUIRES_X86_AVX;
21941 for (uint32_t n = 5; n < 8; n++) {
21942 for (size_t k = 1; k <= 40; k += 9) {
21943 GemmMicrokernelTester()
21944 .mr(1)
21945 .nr(4)
21946 .kr(8)
21947 .sr(1)
21948 .m(1)
21949 .n(4)
21950 .k(k)
21951 .cn_stride(7)
21952 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
21953 }
21954 }
21955 }
21956
21957 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, n_gt_4_subtile) {
21958 TEST_REQUIRES_X86_AVX;
21959 for (uint32_t n = 5; n < 8; n++) {
21960 for (size_t k = 1; k <= 40; k += 9) {
21961 for (uint32_t m = 1; m <= 1; m++) {
21962 GemmMicrokernelTester()
21963 .mr(1)
21964 .nr(4)
21965 .kr(8)
21966 .sr(1)
21967 .m(m)
21968 .n(n)
21969 .k(k)
21970 .iterations(1)
21971 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
21972 }
21973 }
21974 }
21975 }
21976
21977 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, n_div_4) {
21978 TEST_REQUIRES_X86_AVX;
21979 for (uint32_t n = 8; n <= 12; n += 4) {
21980 for (size_t k = 1; k <= 40; k += 9) {
21981 GemmMicrokernelTester()
21982 .mr(1)
21983 .nr(4)
21984 .kr(8)
21985 .sr(1)
21986 .m(1)
21987 .n(4)
21988 .k(k)
21989 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
21990 }
21991 }
21992 }
21993
21994 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, n_div_4_strided_cn) {
21995 TEST_REQUIRES_X86_AVX;
21996 for (uint32_t n = 8; n <= 12; n += 4) {
21997 for (size_t k = 1; k <= 40; k += 9) {
21998 GemmMicrokernelTester()
21999 .mr(1)
22000 .nr(4)
22001 .kr(8)
22002 .sr(1)
22003 .m(1)
22004 .n(n)
22005 .k(k)
22006 .cn_stride(7)
22007 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
22008 }
22009 }
22010 }
22011
22012 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, n_div_4_subtile) {
22013 TEST_REQUIRES_X86_AVX;
22014 for (uint32_t n = 8; n <= 12; n += 4) {
22015 for (size_t k = 1; k <= 40; k += 9) {
22016 for (uint32_t m = 1; m <= 1; m++) {
22017 GemmMicrokernelTester()
22018 .mr(1)
22019 .nr(4)
22020 .kr(8)
22021 .sr(1)
22022 .m(m)
22023 .n(n)
22024 .k(k)
22025 .iterations(1)
22026 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
22027 }
22028 }
22029 }
22030 }
22031
22032 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, small_kernel) {
22033 TEST_REQUIRES_X86_AVX;
22034 for (size_t k = 1; k <= 40; k += 9) {
22035 GemmMicrokernelTester()
22036 .mr(1)
22037 .nr(4)
22038 .kr(8)
22039 .sr(1)
22040 .m(1)
22041 .n(4)
22042 .k(k)
22043 .ks(3)
22044 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
22045 }
22046 }
22047
22048 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, small_kernel_subtile) {
22049 TEST_REQUIRES_X86_AVX;
22050 for (size_t k = 1; k <= 40; k += 9) {
22051 for (uint32_t m = 1; m <= 1; m++) {
22052 for (uint32_t n = 1; n <= 4; n++) {
22053 GemmMicrokernelTester()
22054 .mr(1)
22055 .nr(4)
22056 .kr(8)
22057 .sr(1)
22058 .m(m)
22059 .n(n)
22060 .k(k)
22061 .ks(3)
22062 .iterations(1)
22063 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
22064 }
22065 }
22066 }
22067 }
22068
22069 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, n_gt_4_small_kernel) {
22070 TEST_REQUIRES_X86_AVX;
22071 for (uint32_t n = 5; n < 8; n++) {
22072 for (size_t k = 1; k <= 40; k += 9) {
22073 GemmMicrokernelTester()
22074 .mr(1)
22075 .nr(4)
22076 .kr(8)
22077 .sr(1)
22078 .m(1)
22079 .n(4)
22080 .k(k)
22081 .ks(3)
22082 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
22083 }
22084 }
22085 }
22086
22087 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, n_div_4_small_kernel) {
22088 TEST_REQUIRES_X86_AVX;
22089 for (uint32_t n = 8; n <= 12; n += 4) {
22090 for (size_t k = 1; k <= 40; k += 9) {
22091 GemmMicrokernelTester()
22092 .mr(1)
22093 .nr(4)
22094 .kr(8)
22095 .sr(1)
22096 .m(1)
22097 .n(4)
22098 .k(k)
22099 .ks(3)
22100 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
22101 }
22102 }
22103 }
22104
22105 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, strided_cm_subtile) {
22106 TEST_REQUIRES_X86_AVX;
22107 for (size_t k = 1; k <= 40; k += 9) {
22108 for (uint32_t m = 1; m <= 1; m++) {
22109 for (uint32_t n = 1; n <= 4; n++) {
22110 GemmMicrokernelTester()
22111 .mr(1)
22112 .nr(4)
22113 .kr(8)
22114 .sr(1)
22115 .m(m)
22116 .n(n)
22117 .k(k)
22118 .cm_stride(7)
22119 .iterations(1)
22120 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
22121 }
22122 }
22123 }
22124 }
22125
22126 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, a_offset) {
22127 TEST_REQUIRES_X86_AVX;
22128 for (size_t k = 1; k <= 40; k += 9) {
22129 GemmMicrokernelTester()
22130 .mr(1)
22131 .nr(4)
22132 .kr(8)
22133 .sr(1)
22134 .m(1)
22135 .n(4)
22136 .k(k)
22137 .ks(3)
22138 .a_offset(43)
22139 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
22140 }
22141 }
22142
22143 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, zero) {
22144 TEST_REQUIRES_X86_AVX;
22145 for (uint32_t mz = 0; mz < 1; mz++) {
22146 for (size_t k = 1; k <= 40; k += 9) {
22147 GemmMicrokernelTester()
22148 .mr(1)
22149 .nr(4)
22150 .kr(8)
22151 .sr(1)
22152 .m(1)
22153 .n(4)
22154 .k(k)
22155 .ks(3)
22156 .a_offset(43)
22157 .zero_index(mz)
22158 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
22159 }
22160 }
22161 }
22162
22163 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, qmin) {
22164 TEST_REQUIRES_X86_AVX;
22165 GemmMicrokernelTester()
22166 .mr(1)
22167 .nr(4)
22168 .kr(8)
22169 .sr(1)
22170 .m(1)
22171 .n(4)
22172 .k(8)
22173 .qmin(128)
22174 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
22175 }
22176
22177 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, qmax) {
22178 TEST_REQUIRES_X86_AVX;
22179 GemmMicrokernelTester()
22180 .mr(1)
22181 .nr(4)
22182 .kr(8)
22183 .sr(1)
22184 .m(1)
22185 .n(4)
22186 .k(8)
22187 .qmax(128)
22188 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
22189 }
22190
22191 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, strided_cm) {
22192 TEST_REQUIRES_X86_AVX;
22193 GemmMicrokernelTester()
22194 .mr(1)
22195 .nr(4)
22196 .kr(8)
22197 .sr(1)
22198 .m(1)
22199 .n(4)
22200 .k(8)
22201 .cm_stride(7)
22202 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
22203 }
22204
22205 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, no_a_zero_point) {
22206 TEST_REQUIRES_X86_AVX;
22207 for (size_t k = 1; k <= 40; k += 9) {
22208 GemmMicrokernelTester()
22209 .mr(1)
22210 .nr(4)
22211 .kr(8)
22212 .sr(1)
22213 .m(1)
22214 .n(4)
22215 .k(k)
22216 .a_zero_point(0)
22217 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
22218 }
22219 }
22220
22221 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, no_b_zero_point) {
22222 TEST_REQUIRES_X86_AVX;
22223 for (size_t k = 1; k <= 40; k += 9) {
22224 GemmMicrokernelTester()
22225 .mr(1)
22226 .nr(4)
22227 .kr(8)
22228 .sr(1)
22229 .m(1)
22230 .n(4)
22231 .k(k)
22232 .b_zero_point(0)
22233 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
22234 }
22235 }
22236
22237 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, no_zero_point) {
22238 TEST_REQUIRES_X86_AVX;
22239 for (size_t k = 1; k <= 40; k += 9) {
22240 GemmMicrokernelTester()
22241 .mr(1)
22242 .nr(4)
22243 .kr(8)
22244 .sr(1)
22245 .m(1)
22246 .n(4)
22247 .k(k)
22248 .a_zero_point(0)
22249 .b_zero_point(0)
22250 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
22251 }
22252 }
22253#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
22254
22255
22256#if XNN_ARCH_X86 || XNN_ARCH_X86_64
22257 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_eq_8) {
22258 TEST_REQUIRES_X86_AVX;
22259 GemmMicrokernelTester()
22260 .mr(2)
22261 .nr(4)
22262 .kr(8)
22263 .sr(1)
22264 .m(2)
22265 .n(4)
22266 .k(8)
22267 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
22268 }
22269
22270 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, strided_cn) {
22271 TEST_REQUIRES_X86_AVX;
22272 GemmMicrokernelTester()
22273 .mr(2)
22274 .nr(4)
22275 .kr(8)
22276 .sr(1)
22277 .m(2)
22278 .n(4)
22279 .k(8)
22280 .cn_stride(7)
22281 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
22282 }
22283
22284 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_eq_8_subtile) {
22285 TEST_REQUIRES_X86_AVX;
22286 for (uint32_t m = 1; m <= 2; m++) {
22287 for (uint32_t n = 1; n <= 4; n++) {
22288 GemmMicrokernelTester()
22289 .mr(2)
22290 .nr(4)
22291 .kr(8)
22292 .sr(1)
22293 .m(m)
22294 .n(n)
22295 .k(8)
22296 .iterations(1)
22297 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
22298 }
22299 }
22300 }
22301
22302 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_eq_8_subtile_m) {
22303 TEST_REQUIRES_X86_AVX;
22304 for (uint32_t m = 1; m <= 2; m++) {
22305 GemmMicrokernelTester()
22306 .mr(2)
22307 .nr(4)
22308 .kr(8)
22309 .sr(1)
22310 .m(m)
22311 .n(4)
22312 .k(8)
22313 .iterations(1)
22314 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
22315 }
22316 }
22317
22318 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_eq_8_subtile_n) {
22319 TEST_REQUIRES_X86_AVX;
22320 for (uint32_t n = 1; n <= 4; n++) {
22321 GemmMicrokernelTester()
22322 .mr(2)
22323 .nr(4)
22324 .kr(8)
22325 .sr(1)
22326 .m(2)
22327 .n(n)
22328 .k(8)
22329 .iterations(1)
22330 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
22331 }
22332 }
22333
22334 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_lt_8) {
22335 TEST_REQUIRES_X86_AVX;
22336 for (size_t k = 1; k < 8; k++) {
22337 GemmMicrokernelTester()
22338 .mr(2)
22339 .nr(4)
22340 .kr(8)
22341 .sr(1)
22342 .m(2)
22343 .n(4)
22344 .k(k)
22345 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
22346 }
22347 }
22348
22349 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_lt_8_subtile) {
22350 TEST_REQUIRES_X86_AVX;
22351 for (size_t k = 1; k < 8; k++) {
22352 for (uint32_t m = 1; m <= 2; m++) {
22353 for (uint32_t n = 1; n <= 4; n++) {
22354 GemmMicrokernelTester()
22355 .mr(2)
22356 .nr(4)
22357 .kr(8)
22358 .sr(1)
22359 .m(m)
22360 .n(n)
22361 .k(k)
22362 .iterations(1)
22363 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
22364 }
22365 }
22366 }
22367 }
22368
22369 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_gt_8) {
22370 TEST_REQUIRES_X86_AVX;
22371 for (size_t k = 9; k < 16; k++) {
22372 GemmMicrokernelTester()
22373 .mr(2)
22374 .nr(4)
22375 .kr(8)
22376 .sr(1)
22377 .m(2)
22378 .n(4)
22379 .k(k)
22380 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
22381 }
22382 }
22383
22384 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_gt_8_subtile) {
22385 TEST_REQUIRES_X86_AVX;
22386 for (size_t k = 9; k < 16; k++) {
22387 for (uint32_t m = 1; m <= 2; m++) {
22388 for (uint32_t n = 1; n <= 4; n++) {
22389 GemmMicrokernelTester()
22390 .mr(2)
22391 .nr(4)
22392 .kr(8)
22393 .sr(1)
22394 .m(m)
22395 .n(n)
22396 .k(k)
22397 .iterations(1)
22398 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
22399 }
22400 }
22401 }
22402 }
22403
22404 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_div_8) {
22405 TEST_REQUIRES_X86_AVX;
22406 for (size_t k = 16; k <= 80; k += 8) {
22407 GemmMicrokernelTester()
22408 .mr(2)
22409 .nr(4)
22410 .kr(8)
22411 .sr(1)
22412 .m(2)
22413 .n(4)
22414 .k(k)
22415 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
22416 }
22417 }
22418
22419 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_div_8_subtile) {
22420 TEST_REQUIRES_X86_AVX;
22421 for (size_t k = 16; k <= 80; k += 8) {
22422 for (uint32_t m = 1; m <= 2; m++) {
22423 for (uint32_t n = 1; n <= 4; n++) {
22424 GemmMicrokernelTester()
22425 .mr(2)
22426 .nr(4)
22427 .kr(8)
22428 .sr(1)
22429 .m(m)
22430 .n(n)
22431 .k(k)
22432 .iterations(1)
22433 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
22434 }
22435 }
22436 }
22437 }
22438
22439 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_gt_4) {
22440 TEST_REQUIRES_X86_AVX;
22441 for (uint32_t n = 5; n < 8; n++) {
22442 for (size_t k = 1; k <= 40; k += 9) {
22443 GemmMicrokernelTester()
22444 .mr(2)
22445 .nr(4)
22446 .kr(8)
22447 .sr(1)
22448 .m(2)
22449 .n(4)
22450 .k(k)
22451 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
22452 }
22453 }
22454 }
22455
22456 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_gt_4_strided_cn) {
22457 TEST_REQUIRES_X86_AVX;
22458 for (uint32_t n = 5; n < 8; n++) {
22459 for (size_t k = 1; k <= 40; k += 9) {
22460 GemmMicrokernelTester()
22461 .mr(2)
22462 .nr(4)
22463 .kr(8)
22464 .sr(1)
22465 .m(2)
22466 .n(4)
22467 .k(k)
22468 .cn_stride(7)
22469 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
22470 }
22471 }
22472 }
22473
22474 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_gt_4_subtile) {
22475 TEST_REQUIRES_X86_AVX;
22476 for (uint32_t n = 5; n < 8; n++) {
22477 for (size_t k = 1; k <= 40; k += 9) {
22478 for (uint32_t m = 1; m <= 2; m++) {
22479 GemmMicrokernelTester()
22480 .mr(2)
22481 .nr(4)
22482 .kr(8)
22483 .sr(1)
22484 .m(m)
22485 .n(n)
22486 .k(k)
22487 .iterations(1)
22488 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
22489 }
22490 }
22491 }
22492 }
22493
22494 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_div_4) {
22495 TEST_REQUIRES_X86_AVX;
22496 for (uint32_t n = 8; n <= 12; n += 4) {
22497 for (size_t k = 1; k <= 40; k += 9) {
22498 GemmMicrokernelTester()
22499 .mr(2)
22500 .nr(4)
22501 .kr(8)
22502 .sr(1)
22503 .m(2)
22504 .n(4)
22505 .k(k)
22506 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
22507 }
22508 }
22509 }
22510
22511 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_div_4_strided_cn) {
22512 TEST_REQUIRES_X86_AVX;
22513 for (uint32_t n = 8; n <= 12; n += 4) {
22514 for (size_t k = 1; k <= 40; k += 9) {
22515 GemmMicrokernelTester()
22516 .mr(2)
22517 .nr(4)
22518 .kr(8)
22519 .sr(1)
22520 .m(2)
22521 .n(n)
22522 .k(k)
22523 .cn_stride(7)
22524 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
22525 }
22526 }
22527 }
22528
22529 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_div_4_subtile) {
22530 TEST_REQUIRES_X86_AVX;
22531 for (uint32_t n = 8; n <= 12; n += 4) {
22532 for (size_t k = 1; k <= 40; k += 9) {
22533 for (uint32_t m = 1; m <= 2; m++) {
22534 GemmMicrokernelTester()
22535 .mr(2)
22536 .nr(4)
22537 .kr(8)
22538 .sr(1)
22539 .m(m)
22540 .n(n)
22541 .k(k)
22542 .iterations(1)
22543 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
22544 }
22545 }
22546 }
22547 }
22548
22549 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, small_kernel) {
22550 TEST_REQUIRES_X86_AVX;
22551 for (size_t k = 1; k <= 40; k += 9) {
22552 GemmMicrokernelTester()
22553 .mr(2)
22554 .nr(4)
22555 .kr(8)
22556 .sr(1)
22557 .m(2)
22558 .n(4)
22559 .k(k)
22560 .ks(3)
22561 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
22562 }
22563 }
22564
22565 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, small_kernel_subtile) {
22566 TEST_REQUIRES_X86_AVX;
22567 for (size_t k = 1; k <= 40; k += 9) {
22568 for (uint32_t m = 1; m <= 2; m++) {
22569 for (uint32_t n = 1; n <= 4; n++) {
22570 GemmMicrokernelTester()
22571 .mr(2)
22572 .nr(4)
22573 .kr(8)
22574 .sr(1)
22575 .m(m)
22576 .n(n)
22577 .k(k)
22578 .ks(3)
22579 .iterations(1)
22580 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
22581 }
22582 }
22583 }
22584 }
22585
22586 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_gt_4_small_kernel) {
22587 TEST_REQUIRES_X86_AVX;
22588 for (uint32_t n = 5; n < 8; n++) {
22589 for (size_t k = 1; k <= 40; k += 9) {
22590 GemmMicrokernelTester()
22591 .mr(2)
22592 .nr(4)
22593 .kr(8)
22594 .sr(1)
22595 .m(2)
22596 .n(4)
22597 .k(k)
22598 .ks(3)
22599 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
22600 }
22601 }
22602 }
22603
22604 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_div_4_small_kernel) {
22605 TEST_REQUIRES_X86_AVX;
22606 for (uint32_t n = 8; n <= 12; n += 4) {
22607 for (size_t k = 1; k <= 40; k += 9) {
22608 GemmMicrokernelTester()
22609 .mr(2)
22610 .nr(4)
22611 .kr(8)
22612 .sr(1)
22613 .m(2)
22614 .n(4)
22615 .k(k)
22616 .ks(3)
22617 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
22618 }
22619 }
22620 }
22621
22622 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, strided_cm_subtile) {
22623 TEST_REQUIRES_X86_AVX;
22624 for (size_t k = 1; k <= 40; k += 9) {
22625 for (uint32_t m = 1; m <= 2; m++) {
22626 for (uint32_t n = 1; n <= 4; n++) {
22627 GemmMicrokernelTester()
22628 .mr(2)
22629 .nr(4)
22630 .kr(8)
22631 .sr(1)
22632 .m(m)
22633 .n(n)
22634 .k(k)
22635 .cm_stride(7)
22636 .iterations(1)
22637 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
22638 }
22639 }
22640 }
22641 }
22642
22643 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, a_offset) {
22644 TEST_REQUIRES_X86_AVX;
22645 for (size_t k = 1; k <= 40; k += 9) {
22646 GemmMicrokernelTester()
22647 .mr(2)
22648 .nr(4)
22649 .kr(8)
22650 .sr(1)
22651 .m(2)
22652 .n(4)
22653 .k(k)
22654 .ks(3)
22655 .a_offset(83)
22656 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
22657 }
22658 }
22659
22660 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, zero) {
22661 TEST_REQUIRES_X86_AVX;
22662 for (uint32_t mz = 0; mz < 2; mz++) {
22663 for (size_t k = 1; k <= 40; k += 9) {
22664 GemmMicrokernelTester()
22665 .mr(2)
22666 .nr(4)
22667 .kr(8)
22668 .sr(1)
22669 .m(2)
22670 .n(4)
22671 .k(k)
22672 .ks(3)
22673 .a_offset(83)
22674 .zero_index(mz)
22675 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
22676 }
22677 }
22678 }
22679
22680 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, qmin) {
22681 TEST_REQUIRES_X86_AVX;
22682 GemmMicrokernelTester()
22683 .mr(2)
22684 .nr(4)
22685 .kr(8)
22686 .sr(1)
22687 .m(2)
22688 .n(4)
22689 .k(8)
22690 .qmin(128)
22691 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
22692 }
22693
22694 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, qmax) {
22695 TEST_REQUIRES_X86_AVX;
22696 GemmMicrokernelTester()
22697 .mr(2)
22698 .nr(4)
22699 .kr(8)
22700 .sr(1)
22701 .m(2)
22702 .n(4)
22703 .k(8)
22704 .qmax(128)
22705 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
22706 }
22707
22708 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, strided_cm) {
22709 TEST_REQUIRES_X86_AVX;
22710 GemmMicrokernelTester()
22711 .mr(2)
22712 .nr(4)
22713 .kr(8)
22714 .sr(1)
22715 .m(2)
22716 .n(4)
22717 .k(8)
22718 .cm_stride(7)
22719 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
22720 }
22721
22722 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, no_a_zero_point) {
22723 TEST_REQUIRES_X86_AVX;
22724 for (size_t k = 1; k <= 40; k += 9) {
22725 GemmMicrokernelTester()
22726 .mr(2)
22727 .nr(4)
22728 .kr(8)
22729 .sr(1)
22730 .m(2)
22731 .n(4)
22732 .k(k)
22733 .a_zero_point(0)
22734 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
22735 }
22736 }
22737
22738 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, no_b_zero_point) {
22739 TEST_REQUIRES_X86_AVX;
22740 for (size_t k = 1; k <= 40; k += 9) {
22741 GemmMicrokernelTester()
22742 .mr(2)
22743 .nr(4)
22744 .kr(8)
22745 .sr(1)
22746 .m(2)
22747 .n(4)
22748 .k(k)
22749 .b_zero_point(0)
22750 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
22751 }
22752 }
22753
22754 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, no_zero_point) {
22755 TEST_REQUIRES_X86_AVX;
22756 for (size_t k = 1; k <= 40; k += 9) {
22757 GemmMicrokernelTester()
22758 .mr(2)
22759 .nr(4)
22760 .kr(8)
22761 .sr(1)
22762 .m(2)
22763 .n(4)
22764 .k(k)
22765 .a_zero_point(0)
22766 .b_zero_point(0)
22767 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
22768 }
22769 }
22770#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
22771
22772
22773#if XNN_ARCH_X86 || XNN_ARCH_X86_64
22774 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_eq_8) {
22775 TEST_REQUIRES_X86_AVX;
22776 GemmMicrokernelTester()
22777 .mr(3)
22778 .nr(4)
22779 .kr(8)
22780 .sr(1)
22781 .m(3)
22782 .n(4)
22783 .k(8)
22784 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
22785 }
22786
22787 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, strided_cn) {
22788 TEST_REQUIRES_X86_AVX;
22789 GemmMicrokernelTester()
22790 .mr(3)
22791 .nr(4)
22792 .kr(8)
22793 .sr(1)
22794 .m(3)
22795 .n(4)
22796 .k(8)
22797 .cn_stride(7)
22798 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
22799 }
22800
22801 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_eq_8_subtile) {
22802 TEST_REQUIRES_X86_AVX;
22803 for (uint32_t m = 1; m <= 3; m++) {
22804 for (uint32_t n = 1; n <= 4; n++) {
22805 GemmMicrokernelTester()
22806 .mr(3)
22807 .nr(4)
22808 .kr(8)
22809 .sr(1)
22810 .m(m)
22811 .n(n)
22812 .k(8)
22813 .iterations(1)
22814 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
22815 }
22816 }
22817 }
22818
22819 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_eq_8_subtile_m) {
22820 TEST_REQUIRES_X86_AVX;
22821 for (uint32_t m = 1; m <= 3; m++) {
22822 GemmMicrokernelTester()
22823 .mr(3)
22824 .nr(4)
22825 .kr(8)
22826 .sr(1)
22827 .m(m)
22828 .n(4)
22829 .k(8)
22830 .iterations(1)
22831 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
22832 }
22833 }
22834
22835 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_eq_8_subtile_n) {
22836 TEST_REQUIRES_X86_AVX;
22837 for (uint32_t n = 1; n <= 4; n++) {
22838 GemmMicrokernelTester()
22839 .mr(3)
22840 .nr(4)
22841 .kr(8)
22842 .sr(1)
22843 .m(3)
22844 .n(n)
22845 .k(8)
22846 .iterations(1)
22847 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
22848 }
22849 }
22850
22851 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_lt_8) {
22852 TEST_REQUIRES_X86_AVX;
22853 for (size_t k = 1; k < 8; k++) {
22854 GemmMicrokernelTester()
22855 .mr(3)
22856 .nr(4)
22857 .kr(8)
22858 .sr(1)
22859 .m(3)
22860 .n(4)
22861 .k(k)
22862 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
22863 }
22864 }
22865
22866 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_lt_8_subtile) {
22867 TEST_REQUIRES_X86_AVX;
22868 for (size_t k = 1; k < 8; k++) {
22869 for (uint32_t m = 1; m <= 3; m++) {
22870 for (uint32_t n = 1; n <= 4; n++) {
22871 GemmMicrokernelTester()
22872 .mr(3)
22873 .nr(4)
22874 .kr(8)
22875 .sr(1)
22876 .m(m)
22877 .n(n)
22878 .k(k)
22879 .iterations(1)
22880 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
22881 }
22882 }
22883 }
22884 }
22885
22886 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_gt_8) {
22887 TEST_REQUIRES_X86_AVX;
22888 for (size_t k = 9; k < 16; k++) {
22889 GemmMicrokernelTester()
22890 .mr(3)
22891 .nr(4)
22892 .kr(8)
22893 .sr(1)
22894 .m(3)
22895 .n(4)
22896 .k(k)
22897 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
22898 }
22899 }
22900
22901 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_gt_8_subtile) {
22902 TEST_REQUIRES_X86_AVX;
22903 for (size_t k = 9; k < 16; k++) {
22904 for (uint32_t m = 1; m <= 3; m++) {
22905 for (uint32_t n = 1; n <= 4; n++) {
22906 GemmMicrokernelTester()
22907 .mr(3)
22908 .nr(4)
22909 .kr(8)
22910 .sr(1)
22911 .m(m)
22912 .n(n)
22913 .k(k)
22914 .iterations(1)
22915 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
22916 }
22917 }
22918 }
22919 }
22920
22921 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_div_8) {
22922 TEST_REQUIRES_X86_AVX;
22923 for (size_t k = 16; k <= 80; k += 8) {
22924 GemmMicrokernelTester()
22925 .mr(3)
22926 .nr(4)
22927 .kr(8)
22928 .sr(1)
22929 .m(3)
22930 .n(4)
22931 .k(k)
22932 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
22933 }
22934 }
22935
22936 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_div_8_subtile) {
22937 TEST_REQUIRES_X86_AVX;
22938 for (size_t k = 16; k <= 80; k += 8) {
22939 for (uint32_t m = 1; m <= 3; m++) {
22940 for (uint32_t n = 1; n <= 4; n++) {
22941 GemmMicrokernelTester()
22942 .mr(3)
22943 .nr(4)
22944 .kr(8)
22945 .sr(1)
22946 .m(m)
22947 .n(n)
22948 .k(k)
22949 .iterations(1)
22950 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
22951 }
22952 }
22953 }
22954 }
22955
22956 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_gt_4) {
22957 TEST_REQUIRES_X86_AVX;
22958 for (uint32_t n = 5; n < 8; n++) {
22959 for (size_t k = 1; k <= 40; k += 9) {
22960 GemmMicrokernelTester()
22961 .mr(3)
22962 .nr(4)
22963 .kr(8)
22964 .sr(1)
22965 .m(3)
22966 .n(4)
22967 .k(k)
22968 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
22969 }
22970 }
22971 }
22972
22973 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_gt_4_strided_cn) {
22974 TEST_REQUIRES_X86_AVX;
22975 for (uint32_t n = 5; n < 8; n++) {
22976 for (size_t k = 1; k <= 40; k += 9) {
22977 GemmMicrokernelTester()
22978 .mr(3)
22979 .nr(4)
22980 .kr(8)
22981 .sr(1)
22982 .m(3)
22983 .n(4)
22984 .k(k)
22985 .cn_stride(7)
22986 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
22987 }
22988 }
22989 }
22990
22991 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_gt_4_subtile) {
22992 TEST_REQUIRES_X86_AVX;
22993 for (uint32_t n = 5; n < 8; n++) {
22994 for (size_t k = 1; k <= 40; k += 9) {
22995 for (uint32_t m = 1; m <= 3; m++) {
22996 GemmMicrokernelTester()
22997 .mr(3)
22998 .nr(4)
22999 .kr(8)
23000 .sr(1)
23001 .m(m)
23002 .n(n)
23003 .k(k)
23004 .iterations(1)
23005 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
23006 }
23007 }
23008 }
23009 }
23010
23011 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_div_4) {
23012 TEST_REQUIRES_X86_AVX;
23013 for (uint32_t n = 8; n <= 12; n += 4) {
23014 for (size_t k = 1; k <= 40; k += 9) {
23015 GemmMicrokernelTester()
23016 .mr(3)
23017 .nr(4)
23018 .kr(8)
23019 .sr(1)
23020 .m(3)
23021 .n(4)
23022 .k(k)
23023 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
23024 }
23025 }
23026 }
23027
23028 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_div_4_strided_cn) {
23029 TEST_REQUIRES_X86_AVX;
23030 for (uint32_t n = 8; n <= 12; n += 4) {
23031 for (size_t k = 1; k <= 40; k += 9) {
23032 GemmMicrokernelTester()
23033 .mr(3)
23034 .nr(4)
23035 .kr(8)
23036 .sr(1)
23037 .m(3)
23038 .n(n)
23039 .k(k)
23040 .cn_stride(7)
23041 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
23042 }
23043 }
23044 }
23045
23046 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_div_4_subtile) {
23047 TEST_REQUIRES_X86_AVX;
23048 for (uint32_t n = 8; n <= 12; n += 4) {
23049 for (size_t k = 1; k <= 40; k += 9) {
23050 for (uint32_t m = 1; m <= 3; m++) {
23051 GemmMicrokernelTester()
23052 .mr(3)
23053 .nr(4)
23054 .kr(8)
23055 .sr(1)
23056 .m(m)
23057 .n(n)
23058 .k(k)
23059 .iterations(1)
23060 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
23061 }
23062 }
23063 }
23064 }
23065
23066 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, small_kernel) {
23067 TEST_REQUIRES_X86_AVX;
23068 for (size_t k = 1; k <= 40; k += 9) {
23069 GemmMicrokernelTester()
23070 .mr(3)
23071 .nr(4)
23072 .kr(8)
23073 .sr(1)
23074 .m(3)
23075 .n(4)
23076 .k(k)
23077 .ks(3)
23078 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
23079 }
23080 }
23081
23082 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, small_kernel_subtile) {
23083 TEST_REQUIRES_X86_AVX;
23084 for (size_t k = 1; k <= 40; k += 9) {
23085 for (uint32_t m = 1; m <= 3; m++) {
23086 for (uint32_t n = 1; n <= 4; n++) {
23087 GemmMicrokernelTester()
23088 .mr(3)
23089 .nr(4)
23090 .kr(8)
23091 .sr(1)
23092 .m(m)
23093 .n(n)
23094 .k(k)
23095 .ks(3)
23096 .iterations(1)
23097 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
23098 }
23099 }
23100 }
23101 }
23102
23103 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_gt_4_small_kernel) {
23104 TEST_REQUIRES_X86_AVX;
23105 for (uint32_t n = 5; n < 8; n++) {
23106 for (size_t k = 1; k <= 40; k += 9) {
23107 GemmMicrokernelTester()
23108 .mr(3)
23109 .nr(4)
23110 .kr(8)
23111 .sr(1)
23112 .m(3)
23113 .n(4)
23114 .k(k)
23115 .ks(3)
23116 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
23117 }
23118 }
23119 }
23120
23121 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_div_4_small_kernel) {
23122 TEST_REQUIRES_X86_AVX;
23123 for (uint32_t n = 8; n <= 12; n += 4) {
23124 for (size_t k = 1; k <= 40; k += 9) {
23125 GemmMicrokernelTester()
23126 .mr(3)
23127 .nr(4)
23128 .kr(8)
23129 .sr(1)
23130 .m(3)
23131 .n(4)
23132 .k(k)
23133 .ks(3)
23134 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
23135 }
23136 }
23137 }
23138
23139 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, strided_cm_subtile) {
23140 TEST_REQUIRES_X86_AVX;
23141 for (size_t k = 1; k <= 40; k += 9) {
23142 for (uint32_t m = 1; m <= 3; m++) {
23143 for (uint32_t n = 1; n <= 4; n++) {
23144 GemmMicrokernelTester()
23145 .mr(3)
23146 .nr(4)
23147 .kr(8)
23148 .sr(1)
23149 .m(m)
23150 .n(n)
23151 .k(k)
23152 .cm_stride(7)
23153 .iterations(1)
23154 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
23155 }
23156 }
23157 }
23158 }
23159
23160 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, a_offset) {
23161 TEST_REQUIRES_X86_AVX;
23162 for (size_t k = 1; k <= 40; k += 9) {
23163 GemmMicrokernelTester()
23164 .mr(3)
23165 .nr(4)
23166 .kr(8)
23167 .sr(1)
23168 .m(3)
23169 .n(4)
23170 .k(k)
23171 .ks(3)
23172 .a_offset(127)
23173 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
23174 }
23175 }
23176
23177 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, zero) {
23178 TEST_REQUIRES_X86_AVX;
23179 for (uint32_t mz = 0; mz < 3; mz++) {
23180 for (size_t k = 1; k <= 40; k += 9) {
23181 GemmMicrokernelTester()
23182 .mr(3)
23183 .nr(4)
23184 .kr(8)
23185 .sr(1)
23186 .m(3)
23187 .n(4)
23188 .k(k)
23189 .ks(3)
23190 .a_offset(127)
23191 .zero_index(mz)
23192 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
23193 }
23194 }
23195 }
23196
23197 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, qmin) {
23198 TEST_REQUIRES_X86_AVX;
23199 GemmMicrokernelTester()
23200 .mr(3)
23201 .nr(4)
23202 .kr(8)
23203 .sr(1)
23204 .m(3)
23205 .n(4)
23206 .k(8)
23207 .qmin(128)
23208 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
23209 }
23210
23211 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, qmax) {
23212 TEST_REQUIRES_X86_AVX;
23213 GemmMicrokernelTester()
23214 .mr(3)
23215 .nr(4)
23216 .kr(8)
23217 .sr(1)
23218 .m(3)
23219 .n(4)
23220 .k(8)
23221 .qmax(128)
23222 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
23223 }
23224
23225 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, strided_cm) {
23226 TEST_REQUIRES_X86_AVX;
23227 GemmMicrokernelTester()
23228 .mr(3)
23229 .nr(4)
23230 .kr(8)
23231 .sr(1)
23232 .m(3)
23233 .n(4)
23234 .k(8)
23235 .cm_stride(7)
23236 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
23237 }
23238
23239 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, no_a_zero_point) {
23240 TEST_REQUIRES_X86_AVX;
23241 for (size_t k = 1; k <= 40; k += 9) {
23242 GemmMicrokernelTester()
23243 .mr(3)
23244 .nr(4)
23245 .kr(8)
23246 .sr(1)
23247 .m(3)
23248 .n(4)
23249 .k(k)
23250 .a_zero_point(0)
23251 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
23252 }
23253 }
23254
23255 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, no_b_zero_point) {
23256 TEST_REQUIRES_X86_AVX;
23257 for (size_t k = 1; k <= 40; k += 9) {
23258 GemmMicrokernelTester()
23259 .mr(3)
23260 .nr(4)
23261 .kr(8)
23262 .sr(1)
23263 .m(3)
23264 .n(4)
23265 .k(k)
23266 .b_zero_point(0)
23267 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
23268 }
23269 }
23270
23271 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, no_zero_point) {
23272 TEST_REQUIRES_X86_AVX;
23273 for (size_t k = 1; k <= 40; k += 9) {
23274 GemmMicrokernelTester()
23275 .mr(3)
23276 .nr(4)
23277 .kr(8)
23278 .sr(1)
23279 .m(3)
23280 .n(4)
23281 .k(k)
23282 .a_zero_point(0)
23283 .b_zero_point(0)
23284 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
23285 }
23286 }
23287#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
23288
23289
23290#if XNN_ARCH_X86 || XNN_ARCH_X86_64
23291 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, k_eq_8) {
23292 TEST_REQUIRES_X86_XOP;
23293 GemmMicrokernelTester()
23294 .mr(1)
23295 .nr(4)
23296 .kr(8)
23297 .sr(1)
23298 .m(1)
23299 .n(4)
23300 .k(8)
23301 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
23302 }
23303
23304 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, strided_cn) {
23305 TEST_REQUIRES_X86_XOP;
23306 GemmMicrokernelTester()
23307 .mr(1)
23308 .nr(4)
23309 .kr(8)
23310 .sr(1)
23311 .m(1)
23312 .n(4)
23313 .k(8)
23314 .cn_stride(7)
23315 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
23316 }
23317
23318 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, k_eq_8_subtile) {
23319 TEST_REQUIRES_X86_XOP;
23320 for (uint32_t m = 1; m <= 1; m++) {
23321 for (uint32_t n = 1; n <= 4; n++) {
23322 GemmMicrokernelTester()
23323 .mr(1)
23324 .nr(4)
23325 .kr(8)
23326 .sr(1)
23327 .m(m)
23328 .n(n)
23329 .k(8)
23330 .iterations(1)
23331 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
23332 }
23333 }
23334 }
23335
23336 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, k_eq_8_subtile_m) {
23337 TEST_REQUIRES_X86_XOP;
23338 for (uint32_t m = 1; m <= 1; m++) {
23339 GemmMicrokernelTester()
23340 .mr(1)
23341 .nr(4)
23342 .kr(8)
23343 .sr(1)
23344 .m(m)
23345 .n(4)
23346 .k(8)
23347 .iterations(1)
23348 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
23349 }
23350 }
23351
23352 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, k_eq_8_subtile_n) {
23353 TEST_REQUIRES_X86_XOP;
23354 for (uint32_t n = 1; n <= 4; n++) {
23355 GemmMicrokernelTester()
23356 .mr(1)
23357 .nr(4)
23358 .kr(8)
23359 .sr(1)
23360 .m(1)
23361 .n(n)
23362 .k(8)
23363 .iterations(1)
23364 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
23365 }
23366 }
23367
23368 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, k_lt_8) {
23369 TEST_REQUIRES_X86_XOP;
23370 for (size_t k = 1; k < 8; k++) {
23371 GemmMicrokernelTester()
23372 .mr(1)
23373 .nr(4)
23374 .kr(8)
23375 .sr(1)
23376 .m(1)
23377 .n(4)
23378 .k(k)
23379 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
23380 }
23381 }
23382
23383 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, k_lt_8_subtile) {
23384 TEST_REQUIRES_X86_XOP;
23385 for (size_t k = 1; k < 8; k++) {
23386 for (uint32_t m = 1; m <= 1; m++) {
23387 for (uint32_t n = 1; n <= 4; n++) {
23388 GemmMicrokernelTester()
23389 .mr(1)
23390 .nr(4)
23391 .kr(8)
23392 .sr(1)
23393 .m(m)
23394 .n(n)
23395 .k(k)
23396 .iterations(1)
23397 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
23398 }
23399 }
23400 }
23401 }
23402
23403 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, k_gt_8) {
23404 TEST_REQUIRES_X86_XOP;
23405 for (size_t k = 9; k < 16; k++) {
23406 GemmMicrokernelTester()
23407 .mr(1)
23408 .nr(4)
23409 .kr(8)
23410 .sr(1)
23411 .m(1)
23412 .n(4)
23413 .k(k)
23414 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
23415 }
23416 }
23417
23418 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, k_gt_8_subtile) {
23419 TEST_REQUIRES_X86_XOP;
23420 for (size_t k = 9; k < 16; k++) {
23421 for (uint32_t m = 1; m <= 1; m++) {
23422 for (uint32_t n = 1; n <= 4; n++) {
23423 GemmMicrokernelTester()
23424 .mr(1)
23425 .nr(4)
23426 .kr(8)
23427 .sr(1)
23428 .m(m)
23429 .n(n)
23430 .k(k)
23431 .iterations(1)
23432 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
23433 }
23434 }
23435 }
23436 }
23437
23438 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, k_div_8) {
23439 TEST_REQUIRES_X86_XOP;
23440 for (size_t k = 16; k <= 80; k += 8) {
23441 GemmMicrokernelTester()
23442 .mr(1)
23443 .nr(4)
23444 .kr(8)
23445 .sr(1)
23446 .m(1)
23447 .n(4)
23448 .k(k)
23449 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
23450 }
23451 }
23452
23453 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, k_div_8_subtile) {
23454 TEST_REQUIRES_X86_XOP;
23455 for (size_t k = 16; k <= 80; k += 8) {
23456 for (uint32_t m = 1; m <= 1; m++) {
23457 for (uint32_t n = 1; n <= 4; n++) {
23458 GemmMicrokernelTester()
23459 .mr(1)
23460 .nr(4)
23461 .kr(8)
23462 .sr(1)
23463 .m(m)
23464 .n(n)
23465 .k(k)
23466 .iterations(1)
23467 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
23468 }
23469 }
23470 }
23471 }
23472
23473 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, n_gt_4) {
23474 TEST_REQUIRES_X86_XOP;
23475 for (uint32_t n = 5; n < 8; n++) {
23476 for (size_t k = 1; k <= 40; k += 9) {
23477 GemmMicrokernelTester()
23478 .mr(1)
23479 .nr(4)
23480 .kr(8)
23481 .sr(1)
23482 .m(1)
23483 .n(4)
23484 .k(k)
23485 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
23486 }
23487 }
23488 }
23489
23490 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, n_gt_4_strided_cn) {
23491 TEST_REQUIRES_X86_XOP;
23492 for (uint32_t n = 5; n < 8; n++) {
23493 for (size_t k = 1; k <= 40; k += 9) {
23494 GemmMicrokernelTester()
23495 .mr(1)
23496 .nr(4)
23497 .kr(8)
23498 .sr(1)
23499 .m(1)
23500 .n(4)
23501 .k(k)
23502 .cn_stride(7)
23503 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
23504 }
23505 }
23506 }
23507
23508 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, n_gt_4_subtile) {
23509 TEST_REQUIRES_X86_XOP;
23510 for (uint32_t n = 5; n < 8; n++) {
23511 for (size_t k = 1; k <= 40; k += 9) {
23512 for (uint32_t m = 1; m <= 1; m++) {
23513 GemmMicrokernelTester()
23514 .mr(1)
23515 .nr(4)
23516 .kr(8)
23517 .sr(1)
23518 .m(m)
23519 .n(n)
23520 .k(k)
23521 .iterations(1)
23522 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
23523 }
23524 }
23525 }
23526 }
23527
23528 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, n_div_4) {
23529 TEST_REQUIRES_X86_XOP;
23530 for (uint32_t n = 8; n <= 12; n += 4) {
23531 for (size_t k = 1; k <= 40; k += 9) {
23532 GemmMicrokernelTester()
23533 .mr(1)
23534 .nr(4)
23535 .kr(8)
23536 .sr(1)
23537 .m(1)
23538 .n(4)
23539 .k(k)
23540 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
23541 }
23542 }
23543 }
23544
23545 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, n_div_4_strided_cn) {
23546 TEST_REQUIRES_X86_XOP;
23547 for (uint32_t n = 8; n <= 12; n += 4) {
23548 for (size_t k = 1; k <= 40; k += 9) {
23549 GemmMicrokernelTester()
23550 .mr(1)
23551 .nr(4)
23552 .kr(8)
23553 .sr(1)
23554 .m(1)
23555 .n(n)
23556 .k(k)
23557 .cn_stride(7)
23558 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
23559 }
23560 }
23561 }
23562
23563 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, n_div_4_subtile) {
23564 TEST_REQUIRES_X86_XOP;
23565 for (uint32_t n = 8; n <= 12; n += 4) {
23566 for (size_t k = 1; k <= 40; k += 9) {
23567 for (uint32_t m = 1; m <= 1; m++) {
23568 GemmMicrokernelTester()
23569 .mr(1)
23570 .nr(4)
23571 .kr(8)
23572 .sr(1)
23573 .m(m)
23574 .n(n)
23575 .k(k)
23576 .iterations(1)
23577 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
23578 }
23579 }
23580 }
23581 }
23582
23583 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, small_kernel) {
23584 TEST_REQUIRES_X86_XOP;
23585 for (size_t k = 1; k <= 40; k += 9) {
23586 GemmMicrokernelTester()
23587 .mr(1)
23588 .nr(4)
23589 .kr(8)
23590 .sr(1)
23591 .m(1)
23592 .n(4)
23593 .k(k)
23594 .ks(3)
23595 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
23596 }
23597 }
23598
23599 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, small_kernel_subtile) {
23600 TEST_REQUIRES_X86_XOP;
23601 for (size_t k = 1; k <= 40; k += 9) {
23602 for (uint32_t m = 1; m <= 1; m++) {
23603 for (uint32_t n = 1; n <= 4; n++) {
23604 GemmMicrokernelTester()
23605 .mr(1)
23606 .nr(4)
23607 .kr(8)
23608 .sr(1)
23609 .m(m)
23610 .n(n)
23611 .k(k)
23612 .ks(3)
23613 .iterations(1)
23614 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
23615 }
23616 }
23617 }
23618 }
23619
23620 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, n_gt_4_small_kernel) {
23621 TEST_REQUIRES_X86_XOP;
23622 for (uint32_t n = 5; n < 8; n++) {
23623 for (size_t k = 1; k <= 40; k += 9) {
23624 GemmMicrokernelTester()
23625 .mr(1)
23626 .nr(4)
23627 .kr(8)
23628 .sr(1)
23629 .m(1)
23630 .n(4)
23631 .k(k)
23632 .ks(3)
23633 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
23634 }
23635 }
23636 }
23637
23638 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, n_div_4_small_kernel) {
23639 TEST_REQUIRES_X86_XOP;
23640 for (uint32_t n = 8; n <= 12; n += 4) {
23641 for (size_t k = 1; k <= 40; k += 9) {
23642 GemmMicrokernelTester()
23643 .mr(1)
23644 .nr(4)
23645 .kr(8)
23646 .sr(1)
23647 .m(1)
23648 .n(4)
23649 .k(k)
23650 .ks(3)
23651 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
23652 }
23653 }
23654 }
23655
23656 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, strided_cm_subtile) {
23657 TEST_REQUIRES_X86_XOP;
23658 for (size_t k = 1; k <= 40; k += 9) {
23659 for (uint32_t m = 1; m <= 1; m++) {
23660 for (uint32_t n = 1; n <= 4; n++) {
23661 GemmMicrokernelTester()
23662 .mr(1)
23663 .nr(4)
23664 .kr(8)
23665 .sr(1)
23666 .m(m)
23667 .n(n)
23668 .k(k)
23669 .cm_stride(7)
23670 .iterations(1)
23671 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
23672 }
23673 }
23674 }
23675 }
23676
23677 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, a_offset) {
23678 TEST_REQUIRES_X86_XOP;
23679 for (size_t k = 1; k <= 40; k += 9) {
23680 GemmMicrokernelTester()
23681 .mr(1)
23682 .nr(4)
23683 .kr(8)
23684 .sr(1)
23685 .m(1)
23686 .n(4)
23687 .k(k)
23688 .ks(3)
23689 .a_offset(43)
23690 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
23691 }
23692 }
23693
23694 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, zero) {
23695 TEST_REQUIRES_X86_XOP;
23696 for (uint32_t mz = 0; mz < 1; mz++) {
23697 for (size_t k = 1; k <= 40; k += 9) {
23698 GemmMicrokernelTester()
23699 .mr(1)
23700 .nr(4)
23701 .kr(8)
23702 .sr(1)
23703 .m(1)
23704 .n(4)
23705 .k(k)
23706 .ks(3)
23707 .a_offset(43)
23708 .zero_index(mz)
23709 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
23710 }
23711 }
23712 }
23713
23714 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, qmin) {
23715 TEST_REQUIRES_X86_XOP;
23716 GemmMicrokernelTester()
23717 .mr(1)
23718 .nr(4)
23719 .kr(8)
23720 .sr(1)
23721 .m(1)
23722 .n(4)
23723 .k(8)
23724 .qmin(128)
23725 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
23726 }
23727
23728 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, qmax) {
23729 TEST_REQUIRES_X86_XOP;
23730 GemmMicrokernelTester()
23731 .mr(1)
23732 .nr(4)
23733 .kr(8)
23734 .sr(1)
23735 .m(1)
23736 .n(4)
23737 .k(8)
23738 .qmax(128)
23739 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
23740 }
23741
23742 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, strided_cm) {
23743 TEST_REQUIRES_X86_XOP;
23744 GemmMicrokernelTester()
23745 .mr(1)
23746 .nr(4)
23747 .kr(8)
23748 .sr(1)
23749 .m(1)
23750 .n(4)
23751 .k(8)
23752 .cm_stride(7)
23753 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
23754 }
23755
23756 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, no_a_zero_point) {
23757 TEST_REQUIRES_X86_XOP;
23758 for (size_t k = 1; k <= 40; k += 9) {
23759 GemmMicrokernelTester()
23760 .mr(1)
23761 .nr(4)
23762 .kr(8)
23763 .sr(1)
23764 .m(1)
23765 .n(4)
23766 .k(k)
23767 .a_zero_point(0)
23768 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
23769 }
23770 }
23771
23772 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, no_b_zero_point) {
23773 TEST_REQUIRES_X86_XOP;
23774 for (size_t k = 1; k <= 40; k += 9) {
23775 GemmMicrokernelTester()
23776 .mr(1)
23777 .nr(4)
23778 .kr(8)
23779 .sr(1)
23780 .m(1)
23781 .n(4)
23782 .k(k)
23783 .b_zero_point(0)
23784 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
23785 }
23786 }
23787
23788 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, no_zero_point) {
23789 TEST_REQUIRES_X86_XOP;
23790 for (size_t k = 1; k <= 40; k += 9) {
23791 GemmMicrokernelTester()
23792 .mr(1)
23793 .nr(4)
23794 .kr(8)
23795 .sr(1)
23796 .m(1)
23797 .n(4)
23798 .k(k)
23799 .a_zero_point(0)
23800 .b_zero_point(0)
23801 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
23802 }
23803 }
23804#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
23805
23806
23807#if XNN_ARCH_X86 || XNN_ARCH_X86_64
23808 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_eq_8) {
23809 TEST_REQUIRES_X86_XOP;
23810 GemmMicrokernelTester()
23811 .mr(2)
23812 .nr(4)
23813 .kr(8)
23814 .sr(1)
23815 .m(2)
23816 .n(4)
23817 .k(8)
23818 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
23819 }
23820
23821 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, strided_cn) {
23822 TEST_REQUIRES_X86_XOP;
23823 GemmMicrokernelTester()
23824 .mr(2)
23825 .nr(4)
23826 .kr(8)
23827 .sr(1)
23828 .m(2)
23829 .n(4)
23830 .k(8)
23831 .cn_stride(7)
23832 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
23833 }
23834
23835 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_eq_8_subtile) {
23836 TEST_REQUIRES_X86_XOP;
23837 for (uint32_t m = 1; m <= 2; m++) {
23838 for (uint32_t n = 1; n <= 4; n++) {
23839 GemmMicrokernelTester()
23840 .mr(2)
23841 .nr(4)
23842 .kr(8)
23843 .sr(1)
23844 .m(m)
23845 .n(n)
23846 .k(8)
23847 .iterations(1)
23848 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
23849 }
23850 }
23851 }
23852
23853 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_eq_8_subtile_m) {
23854 TEST_REQUIRES_X86_XOP;
23855 for (uint32_t m = 1; m <= 2; m++) {
23856 GemmMicrokernelTester()
23857 .mr(2)
23858 .nr(4)
23859 .kr(8)
23860 .sr(1)
23861 .m(m)
23862 .n(4)
23863 .k(8)
23864 .iterations(1)
23865 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
23866 }
23867 }
23868
23869 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_eq_8_subtile_n) {
23870 TEST_REQUIRES_X86_XOP;
23871 for (uint32_t n = 1; n <= 4; n++) {
23872 GemmMicrokernelTester()
23873 .mr(2)
23874 .nr(4)
23875 .kr(8)
23876 .sr(1)
23877 .m(2)
23878 .n(n)
23879 .k(8)
23880 .iterations(1)
23881 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
23882 }
23883 }
23884
23885 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_lt_8) {
23886 TEST_REQUIRES_X86_XOP;
23887 for (size_t k = 1; k < 8; k++) {
23888 GemmMicrokernelTester()
23889 .mr(2)
23890 .nr(4)
23891 .kr(8)
23892 .sr(1)
23893 .m(2)
23894 .n(4)
23895 .k(k)
23896 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
23897 }
23898 }
23899
23900 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_lt_8_subtile) {
23901 TEST_REQUIRES_X86_XOP;
23902 for (size_t k = 1; k < 8; k++) {
23903 for (uint32_t m = 1; m <= 2; m++) {
23904 for (uint32_t n = 1; n <= 4; n++) {
23905 GemmMicrokernelTester()
23906 .mr(2)
23907 .nr(4)
23908 .kr(8)
23909 .sr(1)
23910 .m(m)
23911 .n(n)
23912 .k(k)
23913 .iterations(1)
23914 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
23915 }
23916 }
23917 }
23918 }
23919
23920 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_gt_8) {
23921 TEST_REQUIRES_X86_XOP;
23922 for (size_t k = 9; k < 16; k++) {
23923 GemmMicrokernelTester()
23924 .mr(2)
23925 .nr(4)
23926 .kr(8)
23927 .sr(1)
23928 .m(2)
23929 .n(4)
23930 .k(k)
23931 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
23932 }
23933 }
23934
23935 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_gt_8_subtile) {
23936 TEST_REQUIRES_X86_XOP;
23937 for (size_t k = 9; k < 16; k++) {
23938 for (uint32_t m = 1; m <= 2; m++) {
23939 for (uint32_t n = 1; n <= 4; n++) {
23940 GemmMicrokernelTester()
23941 .mr(2)
23942 .nr(4)
23943 .kr(8)
23944 .sr(1)
23945 .m(m)
23946 .n(n)
23947 .k(k)
23948 .iterations(1)
23949 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
23950 }
23951 }
23952 }
23953 }
23954
23955 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_div_8) {
23956 TEST_REQUIRES_X86_XOP;
23957 for (size_t k = 16; k <= 80; k += 8) {
23958 GemmMicrokernelTester()
23959 .mr(2)
23960 .nr(4)
23961 .kr(8)
23962 .sr(1)
23963 .m(2)
23964 .n(4)
23965 .k(k)
23966 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
23967 }
23968 }
23969
23970 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_div_8_subtile) {
23971 TEST_REQUIRES_X86_XOP;
23972 for (size_t k = 16; k <= 80; k += 8) {
23973 for (uint32_t m = 1; m <= 2; m++) {
23974 for (uint32_t n = 1; n <= 4; n++) {
23975 GemmMicrokernelTester()
23976 .mr(2)
23977 .nr(4)
23978 .kr(8)
23979 .sr(1)
23980 .m(m)
23981 .n(n)
23982 .k(k)
23983 .iterations(1)
23984 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
23985 }
23986 }
23987 }
23988 }
23989
23990 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_gt_4) {
23991 TEST_REQUIRES_X86_XOP;
23992 for (uint32_t n = 5; n < 8; n++) {
23993 for (size_t k = 1; k <= 40; k += 9) {
23994 GemmMicrokernelTester()
23995 .mr(2)
23996 .nr(4)
23997 .kr(8)
23998 .sr(1)
23999 .m(2)
24000 .n(4)
24001 .k(k)
24002 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
24003 }
24004 }
24005 }
24006
24007 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_gt_4_strided_cn) {
24008 TEST_REQUIRES_X86_XOP;
24009 for (uint32_t n = 5; n < 8; n++) {
24010 for (size_t k = 1; k <= 40; k += 9) {
24011 GemmMicrokernelTester()
24012 .mr(2)
24013 .nr(4)
24014 .kr(8)
24015 .sr(1)
24016 .m(2)
24017 .n(4)
24018 .k(k)
24019 .cn_stride(7)
24020 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
24021 }
24022 }
24023 }
24024
24025 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_gt_4_subtile) {
24026 TEST_REQUIRES_X86_XOP;
24027 for (uint32_t n = 5; n < 8; n++) {
24028 for (size_t k = 1; k <= 40; k += 9) {
24029 for (uint32_t m = 1; m <= 2; m++) {
24030 GemmMicrokernelTester()
24031 .mr(2)
24032 .nr(4)
24033 .kr(8)
24034 .sr(1)
24035 .m(m)
24036 .n(n)
24037 .k(k)
24038 .iterations(1)
24039 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
24040 }
24041 }
24042 }
24043 }
24044
24045 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_div_4) {
24046 TEST_REQUIRES_X86_XOP;
24047 for (uint32_t n = 8; n <= 12; n += 4) {
24048 for (size_t k = 1; k <= 40; k += 9) {
24049 GemmMicrokernelTester()
24050 .mr(2)
24051 .nr(4)
24052 .kr(8)
24053 .sr(1)
24054 .m(2)
24055 .n(4)
24056 .k(k)
24057 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
24058 }
24059 }
24060 }
24061
24062 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_div_4_strided_cn) {
24063 TEST_REQUIRES_X86_XOP;
24064 for (uint32_t n = 8; n <= 12; n += 4) {
24065 for (size_t k = 1; k <= 40; k += 9) {
24066 GemmMicrokernelTester()
24067 .mr(2)
24068 .nr(4)
24069 .kr(8)
24070 .sr(1)
24071 .m(2)
24072 .n(n)
24073 .k(k)
24074 .cn_stride(7)
24075 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
24076 }
24077 }
24078 }
24079
24080 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_div_4_subtile) {
24081 TEST_REQUIRES_X86_XOP;
24082 for (uint32_t n = 8; n <= 12; n += 4) {
24083 for (size_t k = 1; k <= 40; k += 9) {
24084 for (uint32_t m = 1; m <= 2; m++) {
24085 GemmMicrokernelTester()
24086 .mr(2)
24087 .nr(4)
24088 .kr(8)
24089 .sr(1)
24090 .m(m)
24091 .n(n)
24092 .k(k)
24093 .iterations(1)
24094 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
24095 }
24096 }
24097 }
24098 }
24099
24100 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, small_kernel) {
24101 TEST_REQUIRES_X86_XOP;
24102 for (size_t k = 1; k <= 40; k += 9) {
24103 GemmMicrokernelTester()
24104 .mr(2)
24105 .nr(4)
24106 .kr(8)
24107 .sr(1)
24108 .m(2)
24109 .n(4)
24110 .k(k)
24111 .ks(3)
24112 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
24113 }
24114 }
24115
24116 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, small_kernel_subtile) {
24117 TEST_REQUIRES_X86_XOP;
24118 for (size_t k = 1; k <= 40; k += 9) {
24119 for (uint32_t m = 1; m <= 2; m++) {
24120 for (uint32_t n = 1; n <= 4; n++) {
24121 GemmMicrokernelTester()
24122 .mr(2)
24123 .nr(4)
24124 .kr(8)
24125 .sr(1)
24126 .m(m)
24127 .n(n)
24128 .k(k)
24129 .ks(3)
24130 .iterations(1)
24131 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
24132 }
24133 }
24134 }
24135 }
24136
24137 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_gt_4_small_kernel) {
24138 TEST_REQUIRES_X86_XOP;
24139 for (uint32_t n = 5; n < 8; n++) {
24140 for (size_t k = 1; k <= 40; k += 9) {
24141 GemmMicrokernelTester()
24142 .mr(2)
24143 .nr(4)
24144 .kr(8)
24145 .sr(1)
24146 .m(2)
24147 .n(4)
24148 .k(k)
24149 .ks(3)
24150 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
24151 }
24152 }
24153 }
24154
24155 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_div_4_small_kernel) {
24156 TEST_REQUIRES_X86_XOP;
24157 for (uint32_t n = 8; n <= 12; n += 4) {
24158 for (size_t k = 1; k <= 40; k += 9) {
24159 GemmMicrokernelTester()
24160 .mr(2)
24161 .nr(4)
24162 .kr(8)
24163 .sr(1)
24164 .m(2)
24165 .n(4)
24166 .k(k)
24167 .ks(3)
24168 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
24169 }
24170 }
24171 }
24172
24173 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, strided_cm_subtile) {
24174 TEST_REQUIRES_X86_XOP;
24175 for (size_t k = 1; k <= 40; k += 9) {
24176 for (uint32_t m = 1; m <= 2; m++) {
24177 for (uint32_t n = 1; n <= 4; n++) {
24178 GemmMicrokernelTester()
24179 .mr(2)
24180 .nr(4)
24181 .kr(8)
24182 .sr(1)
24183 .m(m)
24184 .n(n)
24185 .k(k)
24186 .cm_stride(7)
24187 .iterations(1)
24188 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
24189 }
24190 }
24191 }
24192 }
24193
24194 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, a_offset) {
24195 TEST_REQUIRES_X86_XOP;
24196 for (size_t k = 1; k <= 40; k += 9) {
24197 GemmMicrokernelTester()
24198 .mr(2)
24199 .nr(4)
24200 .kr(8)
24201 .sr(1)
24202 .m(2)
24203 .n(4)
24204 .k(k)
24205 .ks(3)
24206 .a_offset(83)
24207 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
24208 }
24209 }
24210
24211 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, zero) {
24212 TEST_REQUIRES_X86_XOP;
24213 for (uint32_t mz = 0; mz < 2; mz++) {
24214 for (size_t k = 1; k <= 40; k += 9) {
24215 GemmMicrokernelTester()
24216 .mr(2)
24217 .nr(4)
24218 .kr(8)
24219 .sr(1)
24220 .m(2)
24221 .n(4)
24222 .k(k)
24223 .ks(3)
24224 .a_offset(83)
24225 .zero_index(mz)
24226 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
24227 }
24228 }
24229 }
24230
24231 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, qmin) {
24232 TEST_REQUIRES_X86_XOP;
24233 GemmMicrokernelTester()
24234 .mr(2)
24235 .nr(4)
24236 .kr(8)
24237 .sr(1)
24238 .m(2)
24239 .n(4)
24240 .k(8)
24241 .qmin(128)
24242 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
24243 }
24244
24245 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, qmax) {
24246 TEST_REQUIRES_X86_XOP;
24247 GemmMicrokernelTester()
24248 .mr(2)
24249 .nr(4)
24250 .kr(8)
24251 .sr(1)
24252 .m(2)
24253 .n(4)
24254 .k(8)
24255 .qmax(128)
24256 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
24257 }
24258
24259 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, strided_cm) {
24260 TEST_REQUIRES_X86_XOP;
24261 GemmMicrokernelTester()
24262 .mr(2)
24263 .nr(4)
24264 .kr(8)
24265 .sr(1)
24266 .m(2)
24267 .n(4)
24268 .k(8)
24269 .cm_stride(7)
24270 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
24271 }
24272
24273 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, no_a_zero_point) {
24274 TEST_REQUIRES_X86_XOP;
24275 for (size_t k = 1; k <= 40; k += 9) {
24276 GemmMicrokernelTester()
24277 .mr(2)
24278 .nr(4)
24279 .kr(8)
24280 .sr(1)
24281 .m(2)
24282 .n(4)
24283 .k(k)
24284 .a_zero_point(0)
24285 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
24286 }
24287 }
24288
24289 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, no_b_zero_point) {
24290 TEST_REQUIRES_X86_XOP;
24291 for (size_t k = 1; k <= 40; k += 9) {
24292 GemmMicrokernelTester()
24293 .mr(2)
24294 .nr(4)
24295 .kr(8)
24296 .sr(1)
24297 .m(2)
24298 .n(4)
24299 .k(k)
24300 .b_zero_point(0)
24301 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
24302 }
24303 }
24304
24305 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, no_zero_point) {
24306 TEST_REQUIRES_X86_XOP;
24307 for (size_t k = 1; k <= 40; k += 9) {
24308 GemmMicrokernelTester()
24309 .mr(2)
24310 .nr(4)
24311 .kr(8)
24312 .sr(1)
24313 .m(2)
24314 .n(4)
24315 .k(k)
24316 .a_zero_point(0)
24317 .b_zero_point(0)
24318 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
24319 }
24320 }
24321#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
24322
24323
24324#if XNN_ARCH_X86 || XNN_ARCH_X86_64
24325 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_eq_8) {
24326 TEST_REQUIRES_X86_XOP;
24327 GemmMicrokernelTester()
24328 .mr(3)
24329 .nr(4)
24330 .kr(8)
24331 .sr(1)
24332 .m(3)
24333 .n(4)
24334 .k(8)
24335 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
24336 }
24337
24338 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, strided_cn) {
24339 TEST_REQUIRES_X86_XOP;
24340 GemmMicrokernelTester()
24341 .mr(3)
24342 .nr(4)
24343 .kr(8)
24344 .sr(1)
24345 .m(3)
24346 .n(4)
24347 .k(8)
24348 .cn_stride(7)
24349 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
24350 }
24351
24352 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_eq_8_subtile) {
24353 TEST_REQUIRES_X86_XOP;
24354 for (uint32_t m = 1; m <= 3; m++) {
24355 for (uint32_t n = 1; n <= 4; n++) {
24356 GemmMicrokernelTester()
24357 .mr(3)
24358 .nr(4)
24359 .kr(8)
24360 .sr(1)
24361 .m(m)
24362 .n(n)
24363 .k(8)
24364 .iterations(1)
24365 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
24366 }
24367 }
24368 }
24369
24370 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_eq_8_subtile_m) {
24371 TEST_REQUIRES_X86_XOP;
24372 for (uint32_t m = 1; m <= 3; m++) {
24373 GemmMicrokernelTester()
24374 .mr(3)
24375 .nr(4)
24376 .kr(8)
24377 .sr(1)
24378 .m(m)
24379 .n(4)
24380 .k(8)
24381 .iterations(1)
24382 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
24383 }
24384 }
24385
24386 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_eq_8_subtile_n) {
24387 TEST_REQUIRES_X86_XOP;
24388 for (uint32_t n = 1; n <= 4; n++) {
24389 GemmMicrokernelTester()
24390 .mr(3)
24391 .nr(4)
24392 .kr(8)
24393 .sr(1)
24394 .m(3)
24395 .n(n)
24396 .k(8)
24397 .iterations(1)
24398 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
24399 }
24400 }
24401
24402 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_lt_8) {
24403 TEST_REQUIRES_X86_XOP;
24404 for (size_t k = 1; k < 8; k++) {
24405 GemmMicrokernelTester()
24406 .mr(3)
24407 .nr(4)
24408 .kr(8)
24409 .sr(1)
24410 .m(3)
24411 .n(4)
24412 .k(k)
24413 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
24414 }
24415 }
24416
24417 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_lt_8_subtile) {
24418 TEST_REQUIRES_X86_XOP;
24419 for (size_t k = 1; k < 8; k++) {
24420 for (uint32_t m = 1; m <= 3; m++) {
24421 for (uint32_t n = 1; n <= 4; n++) {
24422 GemmMicrokernelTester()
24423 .mr(3)
24424 .nr(4)
24425 .kr(8)
24426 .sr(1)
24427 .m(m)
24428 .n(n)
24429 .k(k)
24430 .iterations(1)
24431 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
24432 }
24433 }
24434 }
24435 }
24436
24437 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_gt_8) {
24438 TEST_REQUIRES_X86_XOP;
24439 for (size_t k = 9; k < 16; k++) {
24440 GemmMicrokernelTester()
24441 .mr(3)
24442 .nr(4)
24443 .kr(8)
24444 .sr(1)
24445 .m(3)
24446 .n(4)
24447 .k(k)
24448 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
24449 }
24450 }
24451
24452 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_gt_8_subtile) {
24453 TEST_REQUIRES_X86_XOP;
24454 for (size_t k = 9; k < 16; k++) {
24455 for (uint32_t m = 1; m <= 3; m++) {
24456 for (uint32_t n = 1; n <= 4; n++) {
24457 GemmMicrokernelTester()
24458 .mr(3)
24459 .nr(4)
24460 .kr(8)
24461 .sr(1)
24462 .m(m)
24463 .n(n)
24464 .k(k)
24465 .iterations(1)
24466 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
24467 }
24468 }
24469 }
24470 }
24471
24472 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_div_8) {
24473 TEST_REQUIRES_X86_XOP;
24474 for (size_t k = 16; k <= 80; k += 8) {
24475 GemmMicrokernelTester()
24476 .mr(3)
24477 .nr(4)
24478 .kr(8)
24479 .sr(1)
24480 .m(3)
24481 .n(4)
24482 .k(k)
24483 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
24484 }
24485 }
24486
24487 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_div_8_subtile) {
24488 TEST_REQUIRES_X86_XOP;
24489 for (size_t k = 16; k <= 80; k += 8) {
24490 for (uint32_t m = 1; m <= 3; m++) {
24491 for (uint32_t n = 1; n <= 4; n++) {
24492 GemmMicrokernelTester()
24493 .mr(3)
24494 .nr(4)
24495 .kr(8)
24496 .sr(1)
24497 .m(m)
24498 .n(n)
24499 .k(k)
24500 .iterations(1)
24501 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
24502 }
24503 }
24504 }
24505 }
24506
24507 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_gt_4) {
24508 TEST_REQUIRES_X86_XOP;
24509 for (uint32_t n = 5; n < 8; n++) {
24510 for (size_t k = 1; k <= 40; k += 9) {
24511 GemmMicrokernelTester()
24512 .mr(3)
24513 .nr(4)
24514 .kr(8)
24515 .sr(1)
24516 .m(3)
24517 .n(4)
24518 .k(k)
24519 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
24520 }
24521 }
24522 }
24523
24524 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_gt_4_strided_cn) {
24525 TEST_REQUIRES_X86_XOP;
24526 for (uint32_t n = 5; n < 8; n++) {
24527 for (size_t k = 1; k <= 40; k += 9) {
24528 GemmMicrokernelTester()
24529 .mr(3)
24530 .nr(4)
24531 .kr(8)
24532 .sr(1)
24533 .m(3)
24534 .n(4)
24535 .k(k)
24536 .cn_stride(7)
24537 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
24538 }
24539 }
24540 }
24541
24542 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_gt_4_subtile) {
24543 TEST_REQUIRES_X86_XOP;
24544 for (uint32_t n = 5; n < 8; n++) {
24545 for (size_t k = 1; k <= 40; k += 9) {
24546 for (uint32_t m = 1; m <= 3; m++) {
24547 GemmMicrokernelTester()
24548 .mr(3)
24549 .nr(4)
24550 .kr(8)
24551 .sr(1)
24552 .m(m)
24553 .n(n)
24554 .k(k)
24555 .iterations(1)
24556 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
24557 }
24558 }
24559 }
24560 }
24561
24562 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_div_4) {
24563 TEST_REQUIRES_X86_XOP;
24564 for (uint32_t n = 8; n <= 12; n += 4) {
24565 for (size_t k = 1; k <= 40; k += 9) {
24566 GemmMicrokernelTester()
24567 .mr(3)
24568 .nr(4)
24569 .kr(8)
24570 .sr(1)
24571 .m(3)
24572 .n(4)
24573 .k(k)
24574 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
24575 }
24576 }
24577 }
24578
24579 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_div_4_strided_cn) {
24580 TEST_REQUIRES_X86_XOP;
24581 for (uint32_t n = 8; n <= 12; n += 4) {
24582 for (size_t k = 1; k <= 40; k += 9) {
24583 GemmMicrokernelTester()
24584 .mr(3)
24585 .nr(4)
24586 .kr(8)
24587 .sr(1)
24588 .m(3)
24589 .n(n)
24590 .k(k)
24591 .cn_stride(7)
24592 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
24593 }
24594 }
24595 }
24596
24597 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_div_4_subtile) {
24598 TEST_REQUIRES_X86_XOP;
24599 for (uint32_t n = 8; n <= 12; n += 4) {
24600 for (size_t k = 1; k <= 40; k += 9) {
24601 for (uint32_t m = 1; m <= 3; m++) {
24602 GemmMicrokernelTester()
24603 .mr(3)
24604 .nr(4)
24605 .kr(8)
24606 .sr(1)
24607 .m(m)
24608 .n(n)
24609 .k(k)
24610 .iterations(1)
24611 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
24612 }
24613 }
24614 }
24615 }
24616
24617 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, small_kernel) {
24618 TEST_REQUIRES_X86_XOP;
24619 for (size_t k = 1; k <= 40; k += 9) {
24620 GemmMicrokernelTester()
24621 .mr(3)
24622 .nr(4)
24623 .kr(8)
24624 .sr(1)
24625 .m(3)
24626 .n(4)
24627 .k(k)
24628 .ks(3)
24629 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
24630 }
24631 }
24632
24633 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, small_kernel_subtile) {
24634 TEST_REQUIRES_X86_XOP;
24635 for (size_t k = 1; k <= 40; k += 9) {
24636 for (uint32_t m = 1; m <= 3; m++) {
24637 for (uint32_t n = 1; n <= 4; n++) {
24638 GemmMicrokernelTester()
24639 .mr(3)
24640 .nr(4)
24641 .kr(8)
24642 .sr(1)
24643 .m(m)
24644 .n(n)
24645 .k(k)
24646 .ks(3)
24647 .iterations(1)
24648 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
24649 }
24650 }
24651 }
24652 }
24653
24654 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_gt_4_small_kernel) {
24655 TEST_REQUIRES_X86_XOP;
24656 for (uint32_t n = 5; n < 8; n++) {
24657 for (size_t k = 1; k <= 40; k += 9) {
24658 GemmMicrokernelTester()
24659 .mr(3)
24660 .nr(4)
24661 .kr(8)
24662 .sr(1)
24663 .m(3)
24664 .n(4)
24665 .k(k)
24666 .ks(3)
24667 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
24668 }
24669 }
24670 }
24671
24672 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_div_4_small_kernel) {
24673 TEST_REQUIRES_X86_XOP;
24674 for (uint32_t n = 8; n <= 12; n += 4) {
24675 for (size_t k = 1; k <= 40; k += 9) {
24676 GemmMicrokernelTester()
24677 .mr(3)
24678 .nr(4)
24679 .kr(8)
24680 .sr(1)
24681 .m(3)
24682 .n(4)
24683 .k(k)
24684 .ks(3)
24685 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
24686 }
24687 }
24688 }
24689
24690 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, strided_cm_subtile) {
24691 TEST_REQUIRES_X86_XOP;
24692 for (size_t k = 1; k <= 40; k += 9) {
24693 for (uint32_t m = 1; m <= 3; m++) {
24694 for (uint32_t n = 1; n <= 4; n++) {
24695 GemmMicrokernelTester()
24696 .mr(3)
24697 .nr(4)
24698 .kr(8)
24699 .sr(1)
24700 .m(m)
24701 .n(n)
24702 .k(k)
24703 .cm_stride(7)
24704 .iterations(1)
24705 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
24706 }
24707 }
24708 }
24709 }
24710
24711 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, a_offset) {
24712 TEST_REQUIRES_X86_XOP;
24713 for (size_t k = 1; k <= 40; k += 9) {
24714 GemmMicrokernelTester()
24715 .mr(3)
24716 .nr(4)
24717 .kr(8)
24718 .sr(1)
24719 .m(3)
24720 .n(4)
24721 .k(k)
24722 .ks(3)
24723 .a_offset(127)
24724 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
24725 }
24726 }
24727
24728 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, zero) {
24729 TEST_REQUIRES_X86_XOP;
24730 for (uint32_t mz = 0; mz < 3; mz++) {
24731 for (size_t k = 1; k <= 40; k += 9) {
24732 GemmMicrokernelTester()
24733 .mr(3)
24734 .nr(4)
24735 .kr(8)
24736 .sr(1)
24737 .m(3)
24738 .n(4)
24739 .k(k)
24740 .ks(3)
24741 .a_offset(127)
24742 .zero_index(mz)
24743 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
24744 }
24745 }
24746 }
24747
24748 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, qmin) {
24749 TEST_REQUIRES_X86_XOP;
24750 GemmMicrokernelTester()
24751 .mr(3)
24752 .nr(4)
24753 .kr(8)
24754 .sr(1)
24755 .m(3)
24756 .n(4)
24757 .k(8)
24758 .qmin(128)
24759 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
24760 }
24761
24762 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, qmax) {
24763 TEST_REQUIRES_X86_XOP;
24764 GemmMicrokernelTester()
24765 .mr(3)
24766 .nr(4)
24767 .kr(8)
24768 .sr(1)
24769 .m(3)
24770 .n(4)
24771 .k(8)
24772 .qmax(128)
24773 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
24774 }
24775
24776 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, strided_cm) {
24777 TEST_REQUIRES_X86_XOP;
24778 GemmMicrokernelTester()
24779 .mr(3)
24780 .nr(4)
24781 .kr(8)
24782 .sr(1)
24783 .m(3)
24784 .n(4)
24785 .k(8)
24786 .cm_stride(7)
24787 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
24788 }
24789
24790 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, no_a_zero_point) {
24791 TEST_REQUIRES_X86_XOP;
24792 for (size_t k = 1; k <= 40; k += 9) {
24793 GemmMicrokernelTester()
24794 .mr(3)
24795 .nr(4)
24796 .kr(8)
24797 .sr(1)
24798 .m(3)
24799 .n(4)
24800 .k(k)
24801 .a_zero_point(0)
24802 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
24803 }
24804 }
24805
24806 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, no_b_zero_point) {
24807 TEST_REQUIRES_X86_XOP;
24808 for (size_t k = 1; k <= 40; k += 9) {
24809 GemmMicrokernelTester()
24810 .mr(3)
24811 .nr(4)
24812 .kr(8)
24813 .sr(1)
24814 .m(3)
24815 .n(4)
24816 .k(k)
24817 .b_zero_point(0)
24818 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
24819 }
24820 }
24821
24822 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, no_zero_point) {
24823 TEST_REQUIRES_X86_XOP;
24824 for (size_t k = 1; k <= 40; k += 9) {
24825 GemmMicrokernelTester()
24826 .mr(3)
24827 .nr(4)
24828 .kr(8)
24829 .sr(1)
24830 .m(3)
24831 .n(4)
24832 .k(k)
24833 .a_zero_point(0)
24834 .b_zero_point(0)
24835 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
24836 }
24837 }
24838#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
24839
24840
24841#if XNN_ARCH_X86 || XNN_ARCH_X86_64
24842 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_eq_8) {
24843 TEST_REQUIRES_X86_SSE2;
24844 GemmMicrokernelTester()
24845 .mr(1)
24846 .nr(4)
24847 .kr(8)
24848 .sr(1)
24849 .m(1)
24850 .n(4)
24851 .k(8)
24852 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
24853 }
24854
24855 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, strided_cn) {
24856 TEST_REQUIRES_X86_SSE2;
24857 GemmMicrokernelTester()
24858 .mr(1)
24859 .nr(4)
24860 .kr(8)
24861 .sr(1)
24862 .m(1)
24863 .n(4)
24864 .k(8)
24865 .cn_stride(7)
24866 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
24867 }
24868
24869 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_eq_8_subtile) {
24870 TEST_REQUIRES_X86_SSE2;
24871 for (uint32_t m = 1; m <= 1; m++) {
24872 for (uint32_t n = 1; n <= 4; n++) {
24873 GemmMicrokernelTester()
24874 .mr(1)
24875 .nr(4)
24876 .kr(8)
24877 .sr(1)
24878 .m(m)
24879 .n(n)
24880 .k(8)
24881 .iterations(1)
24882 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
24883 }
24884 }
24885 }
24886
24887 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_eq_8_subtile_m) {
24888 TEST_REQUIRES_X86_SSE2;
24889 for (uint32_t m = 1; m <= 1; m++) {
24890 GemmMicrokernelTester()
24891 .mr(1)
24892 .nr(4)
24893 .kr(8)
24894 .sr(1)
24895 .m(m)
24896 .n(4)
24897 .k(8)
24898 .iterations(1)
24899 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
24900 }
24901 }
24902
24903 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_eq_8_subtile_n) {
24904 TEST_REQUIRES_X86_SSE2;
24905 for (uint32_t n = 1; n <= 4; n++) {
24906 GemmMicrokernelTester()
24907 .mr(1)
24908 .nr(4)
24909 .kr(8)
24910 .sr(1)
24911 .m(1)
24912 .n(n)
24913 .k(8)
24914 .iterations(1)
24915 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
24916 }
24917 }
24918
24919 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_lt_8) {
24920 TEST_REQUIRES_X86_SSE2;
24921 for (size_t k = 1; k < 8; k++) {
24922 GemmMicrokernelTester()
24923 .mr(1)
24924 .nr(4)
24925 .kr(8)
24926 .sr(1)
24927 .m(1)
24928 .n(4)
24929 .k(k)
24930 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
24931 }
24932 }
24933
24934 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_lt_8_subtile) {
24935 TEST_REQUIRES_X86_SSE2;
24936 for (size_t k = 1; k < 8; k++) {
24937 for (uint32_t m = 1; m <= 1; m++) {
24938 for (uint32_t n = 1; n <= 4; n++) {
24939 GemmMicrokernelTester()
24940 .mr(1)
24941 .nr(4)
24942 .kr(8)
24943 .sr(1)
24944 .m(m)
24945 .n(n)
24946 .k(k)
24947 .iterations(1)
24948 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
24949 }
24950 }
24951 }
24952 }
24953
24954 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_gt_8) {
24955 TEST_REQUIRES_X86_SSE2;
24956 for (size_t k = 9; k < 16; k++) {
24957 GemmMicrokernelTester()
24958 .mr(1)
24959 .nr(4)
24960 .kr(8)
24961 .sr(1)
24962 .m(1)
24963 .n(4)
24964 .k(k)
24965 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
24966 }
24967 }
24968
24969 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_gt_8_subtile) {
24970 TEST_REQUIRES_X86_SSE2;
24971 for (size_t k = 9; k < 16; k++) {
24972 for (uint32_t m = 1; m <= 1; m++) {
24973 for (uint32_t n = 1; n <= 4; n++) {
24974 GemmMicrokernelTester()
24975 .mr(1)
24976 .nr(4)
24977 .kr(8)
24978 .sr(1)
24979 .m(m)
24980 .n(n)
24981 .k(k)
24982 .iterations(1)
24983 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
24984 }
24985 }
24986 }
24987 }
24988
24989 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_div_8) {
24990 TEST_REQUIRES_X86_SSE2;
24991 for (size_t k = 16; k <= 80; k += 8) {
24992 GemmMicrokernelTester()
24993 .mr(1)
24994 .nr(4)
24995 .kr(8)
24996 .sr(1)
24997 .m(1)
24998 .n(4)
24999 .k(k)
25000 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
25001 }
25002 }
25003
25004 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_div_8_subtile) {
25005 TEST_REQUIRES_X86_SSE2;
25006 for (size_t k = 16; k <= 80; k += 8) {
25007 for (uint32_t m = 1; m <= 1; m++) {
25008 for (uint32_t n = 1; n <= 4; n++) {
25009 GemmMicrokernelTester()
25010 .mr(1)
25011 .nr(4)
25012 .kr(8)
25013 .sr(1)
25014 .m(m)
25015 .n(n)
25016 .k(k)
25017 .iterations(1)
25018 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
25019 }
25020 }
25021 }
25022 }
25023
25024 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_gt_4) {
25025 TEST_REQUIRES_X86_SSE2;
25026 for (uint32_t n = 5; n < 8; n++) {
25027 for (size_t k = 1; k <= 40; k += 9) {
25028 GemmMicrokernelTester()
25029 .mr(1)
25030 .nr(4)
25031 .kr(8)
25032 .sr(1)
25033 .m(1)
25034 .n(4)
25035 .k(k)
25036 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
25037 }
25038 }
25039 }
25040
25041 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_gt_4_strided_cn) {
25042 TEST_REQUIRES_X86_SSE2;
25043 for (uint32_t n = 5; n < 8; n++) {
25044 for (size_t k = 1; k <= 40; k += 9) {
25045 GemmMicrokernelTester()
25046 .mr(1)
25047 .nr(4)
25048 .kr(8)
25049 .sr(1)
25050 .m(1)
25051 .n(4)
25052 .k(k)
25053 .cn_stride(7)
25054 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
25055 }
25056 }
25057 }
25058
25059 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_gt_4_subtile) {
25060 TEST_REQUIRES_X86_SSE2;
25061 for (uint32_t n = 5; n < 8; n++) {
25062 for (size_t k = 1; k <= 40; k += 9) {
25063 for (uint32_t m = 1; m <= 1; m++) {
25064 GemmMicrokernelTester()
25065 .mr(1)
25066 .nr(4)
25067 .kr(8)
25068 .sr(1)
25069 .m(m)
25070 .n(n)
25071 .k(k)
25072 .iterations(1)
25073 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
25074 }
25075 }
25076 }
25077 }
25078
25079 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_div_4) {
25080 TEST_REQUIRES_X86_SSE2;
25081 for (uint32_t n = 8; n <= 12; n += 4) {
25082 for (size_t k = 1; k <= 40; k += 9) {
25083 GemmMicrokernelTester()
25084 .mr(1)
25085 .nr(4)
25086 .kr(8)
25087 .sr(1)
25088 .m(1)
25089 .n(4)
25090 .k(k)
25091 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
25092 }
25093 }
25094 }
25095
25096 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_div_4_strided_cn) {
25097 TEST_REQUIRES_X86_SSE2;
25098 for (uint32_t n = 8; n <= 12; n += 4) {
25099 for (size_t k = 1; k <= 40; k += 9) {
25100 GemmMicrokernelTester()
25101 .mr(1)
25102 .nr(4)
25103 .kr(8)
25104 .sr(1)
25105 .m(1)
25106 .n(n)
25107 .k(k)
25108 .cn_stride(7)
25109 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
25110 }
25111 }
25112 }
25113
25114 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_div_4_subtile) {
25115 TEST_REQUIRES_X86_SSE2;
25116 for (uint32_t n = 8; n <= 12; n += 4) {
25117 for (size_t k = 1; k <= 40; k += 9) {
25118 for (uint32_t m = 1; m <= 1; m++) {
25119 GemmMicrokernelTester()
25120 .mr(1)
25121 .nr(4)
25122 .kr(8)
25123 .sr(1)
25124 .m(m)
25125 .n(n)
25126 .k(k)
25127 .iterations(1)
25128 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
25129 }
25130 }
25131 }
25132 }
25133
25134 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, small_kernel) {
25135 TEST_REQUIRES_X86_SSE2;
25136 for (size_t k = 1; k <= 40; k += 9) {
25137 GemmMicrokernelTester()
25138 .mr(1)
25139 .nr(4)
25140 .kr(8)
25141 .sr(1)
25142 .m(1)
25143 .n(4)
25144 .k(k)
25145 .ks(3)
25146 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
25147 }
25148 }
25149
25150 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, small_kernel_subtile) {
25151 TEST_REQUIRES_X86_SSE2;
25152 for (size_t k = 1; k <= 40; k += 9) {
25153 for (uint32_t m = 1; m <= 1; m++) {
25154 for (uint32_t n = 1; n <= 4; n++) {
25155 GemmMicrokernelTester()
25156 .mr(1)
25157 .nr(4)
25158 .kr(8)
25159 .sr(1)
25160 .m(m)
25161 .n(n)
25162 .k(k)
25163 .ks(3)
25164 .iterations(1)
25165 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
25166 }
25167 }
25168 }
25169 }
25170
25171 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_gt_4_small_kernel) {
25172 TEST_REQUIRES_X86_SSE2;
25173 for (uint32_t n = 5; n < 8; n++) {
25174 for (size_t k = 1; k <= 40; k += 9) {
25175 GemmMicrokernelTester()
25176 .mr(1)
25177 .nr(4)
25178 .kr(8)
25179 .sr(1)
25180 .m(1)
25181 .n(4)
25182 .k(k)
25183 .ks(3)
25184 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
25185 }
25186 }
25187 }
25188
25189 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_div_4_small_kernel) {
25190 TEST_REQUIRES_X86_SSE2;
25191 for (uint32_t n = 8; n <= 12; n += 4) {
25192 for (size_t k = 1; k <= 40; k += 9) {
25193 GemmMicrokernelTester()
25194 .mr(1)
25195 .nr(4)
25196 .kr(8)
25197 .sr(1)
25198 .m(1)
25199 .n(4)
25200 .k(k)
25201 .ks(3)
25202 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
25203 }
25204 }
25205 }
25206
25207 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, strided_cm_subtile) {
25208 TEST_REQUIRES_X86_SSE2;
25209 for (size_t k = 1; k <= 40; k += 9) {
25210 for (uint32_t m = 1; m <= 1; m++) {
25211 for (uint32_t n = 1; n <= 4; n++) {
25212 GemmMicrokernelTester()
25213 .mr(1)
25214 .nr(4)
25215 .kr(8)
25216 .sr(1)
25217 .m(m)
25218 .n(n)
25219 .k(k)
25220 .cm_stride(7)
25221 .iterations(1)
25222 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
25223 }
25224 }
25225 }
25226 }
25227
25228 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, a_offset) {
25229 TEST_REQUIRES_X86_SSE2;
25230 for (size_t k = 1; k <= 40; k += 9) {
25231 GemmMicrokernelTester()
25232 .mr(1)
25233 .nr(4)
25234 .kr(8)
25235 .sr(1)
25236 .m(1)
25237 .n(4)
25238 .k(k)
25239 .ks(3)
25240 .a_offset(43)
25241 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
25242 }
25243 }
25244
25245 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, zero) {
25246 TEST_REQUIRES_X86_SSE2;
25247 for (uint32_t mz = 0; mz < 1; mz++) {
25248 for (size_t k = 1; k <= 40; k += 9) {
25249 GemmMicrokernelTester()
25250 .mr(1)
25251 .nr(4)
25252 .kr(8)
25253 .sr(1)
25254 .m(1)
25255 .n(4)
25256 .k(k)
25257 .ks(3)
25258 .a_offset(43)
25259 .zero_index(mz)
25260 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
25261 }
25262 }
25263 }
25264
25265 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, qmin) {
25266 TEST_REQUIRES_X86_SSE2;
25267 GemmMicrokernelTester()
25268 .mr(1)
25269 .nr(4)
25270 .kr(8)
25271 .sr(1)
25272 .m(1)
25273 .n(4)
25274 .k(8)
25275 .qmin(128)
25276 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
25277 }
25278
25279 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, qmax) {
25280 TEST_REQUIRES_X86_SSE2;
25281 GemmMicrokernelTester()
25282 .mr(1)
25283 .nr(4)
25284 .kr(8)
25285 .sr(1)
25286 .m(1)
25287 .n(4)
25288 .k(8)
25289 .qmax(128)
25290 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
25291 }
25292
25293 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, strided_cm) {
25294 TEST_REQUIRES_X86_SSE2;
25295 GemmMicrokernelTester()
25296 .mr(1)
25297 .nr(4)
25298 .kr(8)
25299 .sr(1)
25300 .m(1)
25301 .n(4)
25302 .k(8)
25303 .cm_stride(7)
25304 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
25305 }
25306
25307 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, no_a_zero_point) {
25308 TEST_REQUIRES_X86_SSE2;
25309 for (size_t k = 1; k <= 40; k += 9) {
25310 GemmMicrokernelTester()
25311 .mr(1)
25312 .nr(4)
25313 .kr(8)
25314 .sr(1)
25315 .m(1)
25316 .n(4)
25317 .k(k)
25318 .a_zero_point(0)
25319 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
25320 }
25321 }
25322
25323 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, no_b_zero_point) {
25324 TEST_REQUIRES_X86_SSE2;
25325 for (size_t k = 1; k <= 40; k += 9) {
25326 GemmMicrokernelTester()
25327 .mr(1)
25328 .nr(4)
25329 .kr(8)
25330 .sr(1)
25331 .m(1)
25332 .n(4)
25333 .k(k)
25334 .b_zero_point(0)
25335 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
25336 }
25337 }
25338
25339 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, no_zero_point) {
25340 TEST_REQUIRES_X86_SSE2;
25341 for (size_t k = 1; k <= 40; k += 9) {
25342 GemmMicrokernelTester()
25343 .mr(1)
25344 .nr(4)
25345 .kr(8)
25346 .sr(1)
25347 .m(1)
25348 .n(4)
25349 .k(k)
25350 .a_zero_point(0)
25351 .b_zero_point(0)
25352 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
25353 }
25354 }
25355#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
25356
25357
25358#if XNN_ARCH_X86 || XNN_ARCH_X86_64
25359 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_eq_8) {
25360 TEST_REQUIRES_X86_SSE2;
25361 GemmMicrokernelTester()
25362 .mr(2)
25363 .nr(4)
25364 .kr(8)
25365 .sr(1)
25366 .m(2)
25367 .n(4)
25368 .k(8)
25369 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
25370 }
25371
25372 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, strided_cn) {
25373 TEST_REQUIRES_X86_SSE2;
25374 GemmMicrokernelTester()
25375 .mr(2)
25376 .nr(4)
25377 .kr(8)
25378 .sr(1)
25379 .m(2)
25380 .n(4)
25381 .k(8)
25382 .cn_stride(7)
25383 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
25384 }
25385
25386 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_eq_8_subtile) {
25387 TEST_REQUIRES_X86_SSE2;
25388 for (uint32_t m = 1; m <= 2; m++) {
25389 for (uint32_t n = 1; n <= 4; n++) {
25390 GemmMicrokernelTester()
25391 .mr(2)
25392 .nr(4)
25393 .kr(8)
25394 .sr(1)
25395 .m(m)
25396 .n(n)
25397 .k(8)
25398 .iterations(1)
25399 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
25400 }
25401 }
25402 }
25403
25404 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_eq_8_subtile_m) {
25405 TEST_REQUIRES_X86_SSE2;
25406 for (uint32_t m = 1; m <= 2; m++) {
25407 GemmMicrokernelTester()
25408 .mr(2)
25409 .nr(4)
25410 .kr(8)
25411 .sr(1)
25412 .m(m)
25413 .n(4)
25414 .k(8)
25415 .iterations(1)
25416 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
25417 }
25418 }
25419
25420 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_eq_8_subtile_n) {
25421 TEST_REQUIRES_X86_SSE2;
25422 for (uint32_t n = 1; n <= 4; n++) {
25423 GemmMicrokernelTester()
25424 .mr(2)
25425 .nr(4)
25426 .kr(8)
25427 .sr(1)
25428 .m(2)
25429 .n(n)
25430 .k(8)
25431 .iterations(1)
25432 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
25433 }
25434 }
25435
25436 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_lt_8) {
25437 TEST_REQUIRES_X86_SSE2;
25438 for (size_t k = 1; k < 8; k++) {
25439 GemmMicrokernelTester()
25440 .mr(2)
25441 .nr(4)
25442 .kr(8)
25443 .sr(1)
25444 .m(2)
25445 .n(4)
25446 .k(k)
25447 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
25448 }
25449 }
25450
25451 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_lt_8_subtile) {
25452 TEST_REQUIRES_X86_SSE2;
25453 for (size_t k = 1; k < 8; k++) {
25454 for (uint32_t m = 1; m <= 2; m++) {
25455 for (uint32_t n = 1; n <= 4; n++) {
25456 GemmMicrokernelTester()
25457 .mr(2)
25458 .nr(4)
25459 .kr(8)
25460 .sr(1)
25461 .m(m)
25462 .n(n)
25463 .k(k)
25464 .iterations(1)
25465 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
25466 }
25467 }
25468 }
25469 }
25470
25471 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_gt_8) {
25472 TEST_REQUIRES_X86_SSE2;
25473 for (size_t k = 9; k < 16; k++) {
25474 GemmMicrokernelTester()
25475 .mr(2)
25476 .nr(4)
25477 .kr(8)
25478 .sr(1)
25479 .m(2)
25480 .n(4)
25481 .k(k)
25482 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
25483 }
25484 }
25485
25486 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_gt_8_subtile) {
25487 TEST_REQUIRES_X86_SSE2;
25488 for (size_t k = 9; k < 16; k++) {
25489 for (uint32_t m = 1; m <= 2; m++) {
25490 for (uint32_t n = 1; n <= 4; n++) {
25491 GemmMicrokernelTester()
25492 .mr(2)
25493 .nr(4)
25494 .kr(8)
25495 .sr(1)
25496 .m(m)
25497 .n(n)
25498 .k(k)
25499 .iterations(1)
25500 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
25501 }
25502 }
25503 }
25504 }
25505
25506 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_div_8) {
25507 TEST_REQUIRES_X86_SSE2;
25508 for (size_t k = 16; k <= 80; k += 8) {
25509 GemmMicrokernelTester()
25510 .mr(2)
25511 .nr(4)
25512 .kr(8)
25513 .sr(1)
25514 .m(2)
25515 .n(4)
25516 .k(k)
25517 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
25518 }
25519 }
25520
25521 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_div_8_subtile) {
25522 TEST_REQUIRES_X86_SSE2;
25523 for (size_t k = 16; k <= 80; k += 8) {
25524 for (uint32_t m = 1; m <= 2; m++) {
25525 for (uint32_t n = 1; n <= 4; n++) {
25526 GemmMicrokernelTester()
25527 .mr(2)
25528 .nr(4)
25529 .kr(8)
25530 .sr(1)
25531 .m(m)
25532 .n(n)
25533 .k(k)
25534 .iterations(1)
25535 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
25536 }
25537 }
25538 }
25539 }
25540
25541 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_gt_4) {
25542 TEST_REQUIRES_X86_SSE2;
25543 for (uint32_t n = 5; n < 8; n++) {
25544 for (size_t k = 1; k <= 40; k += 9) {
25545 GemmMicrokernelTester()
25546 .mr(2)
25547 .nr(4)
25548 .kr(8)
25549 .sr(1)
25550 .m(2)
25551 .n(4)
25552 .k(k)
25553 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
25554 }
25555 }
25556 }
25557
25558 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_gt_4_strided_cn) {
25559 TEST_REQUIRES_X86_SSE2;
25560 for (uint32_t n = 5; n < 8; n++) {
25561 for (size_t k = 1; k <= 40; k += 9) {
25562 GemmMicrokernelTester()
25563 .mr(2)
25564 .nr(4)
25565 .kr(8)
25566 .sr(1)
25567 .m(2)
25568 .n(4)
25569 .k(k)
25570 .cn_stride(7)
25571 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
25572 }
25573 }
25574 }
25575
25576 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_gt_4_subtile) {
25577 TEST_REQUIRES_X86_SSE2;
25578 for (uint32_t n = 5; n < 8; n++) {
25579 for (size_t k = 1; k <= 40; k += 9) {
25580 for (uint32_t m = 1; m <= 2; m++) {
25581 GemmMicrokernelTester()
25582 .mr(2)
25583 .nr(4)
25584 .kr(8)
25585 .sr(1)
25586 .m(m)
25587 .n(n)
25588 .k(k)
25589 .iterations(1)
25590 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
25591 }
25592 }
25593 }
25594 }
25595
25596 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_div_4) {
25597 TEST_REQUIRES_X86_SSE2;
25598 for (uint32_t n = 8; n <= 12; n += 4) {
25599 for (size_t k = 1; k <= 40; k += 9) {
25600 GemmMicrokernelTester()
25601 .mr(2)
25602 .nr(4)
25603 .kr(8)
25604 .sr(1)
25605 .m(2)
25606 .n(4)
25607 .k(k)
25608 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
25609 }
25610 }
25611 }
25612
25613 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_div_4_strided_cn) {
25614 TEST_REQUIRES_X86_SSE2;
25615 for (uint32_t n = 8; n <= 12; n += 4) {
25616 for (size_t k = 1; k <= 40; k += 9) {
25617 GemmMicrokernelTester()
25618 .mr(2)
25619 .nr(4)
25620 .kr(8)
25621 .sr(1)
25622 .m(2)
25623 .n(n)
25624 .k(k)
25625 .cn_stride(7)
25626 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
25627 }
25628 }
25629 }
25630
25631 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_div_4_subtile) {
25632 TEST_REQUIRES_X86_SSE2;
25633 for (uint32_t n = 8; n <= 12; n += 4) {
25634 for (size_t k = 1; k <= 40; k += 9) {
25635 for (uint32_t m = 1; m <= 2; m++) {
25636 GemmMicrokernelTester()
25637 .mr(2)
25638 .nr(4)
25639 .kr(8)
25640 .sr(1)
25641 .m(m)
25642 .n(n)
25643 .k(k)
25644 .iterations(1)
25645 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
25646 }
25647 }
25648 }
25649 }
25650
25651 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, small_kernel) {
25652 TEST_REQUIRES_X86_SSE2;
25653 for (size_t k = 1; k <= 40; k += 9) {
25654 GemmMicrokernelTester()
25655 .mr(2)
25656 .nr(4)
25657 .kr(8)
25658 .sr(1)
25659 .m(2)
25660 .n(4)
25661 .k(k)
25662 .ks(3)
25663 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
25664 }
25665 }
25666
25667 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, small_kernel_subtile) {
25668 TEST_REQUIRES_X86_SSE2;
25669 for (size_t k = 1; k <= 40; k += 9) {
25670 for (uint32_t m = 1; m <= 2; m++) {
25671 for (uint32_t n = 1; n <= 4; n++) {
25672 GemmMicrokernelTester()
25673 .mr(2)
25674 .nr(4)
25675 .kr(8)
25676 .sr(1)
25677 .m(m)
25678 .n(n)
25679 .k(k)
25680 .ks(3)
25681 .iterations(1)
25682 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
25683 }
25684 }
25685 }
25686 }
25687
25688 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_gt_4_small_kernel) {
25689 TEST_REQUIRES_X86_SSE2;
25690 for (uint32_t n = 5; n < 8; n++) {
25691 for (size_t k = 1; k <= 40; k += 9) {
25692 GemmMicrokernelTester()
25693 .mr(2)
25694 .nr(4)
25695 .kr(8)
25696 .sr(1)
25697 .m(2)
25698 .n(4)
25699 .k(k)
25700 .ks(3)
25701 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
25702 }
25703 }
25704 }
25705
25706 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_div_4_small_kernel) {
25707 TEST_REQUIRES_X86_SSE2;
25708 for (uint32_t n = 8; n <= 12; n += 4) {
25709 for (size_t k = 1; k <= 40; k += 9) {
25710 GemmMicrokernelTester()
25711 .mr(2)
25712 .nr(4)
25713 .kr(8)
25714 .sr(1)
25715 .m(2)
25716 .n(4)
25717 .k(k)
25718 .ks(3)
25719 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
25720 }
25721 }
25722 }
25723
25724 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, strided_cm_subtile) {
25725 TEST_REQUIRES_X86_SSE2;
25726 for (size_t k = 1; k <= 40; k += 9) {
25727 for (uint32_t m = 1; m <= 2; m++) {
25728 for (uint32_t n = 1; n <= 4; n++) {
25729 GemmMicrokernelTester()
25730 .mr(2)
25731 .nr(4)
25732 .kr(8)
25733 .sr(1)
25734 .m(m)
25735 .n(n)
25736 .k(k)
25737 .cm_stride(7)
25738 .iterations(1)
25739 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
25740 }
25741 }
25742 }
25743 }
25744
25745 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, a_offset) {
25746 TEST_REQUIRES_X86_SSE2;
25747 for (size_t k = 1; k <= 40; k += 9) {
25748 GemmMicrokernelTester()
25749 .mr(2)
25750 .nr(4)
25751 .kr(8)
25752 .sr(1)
25753 .m(2)
25754 .n(4)
25755 .k(k)
25756 .ks(3)
25757 .a_offset(83)
25758 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
25759 }
25760 }
25761
25762 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, zero) {
25763 TEST_REQUIRES_X86_SSE2;
25764 for (uint32_t mz = 0; mz < 2; mz++) {
25765 for (size_t k = 1; k <= 40; k += 9) {
25766 GemmMicrokernelTester()
25767 .mr(2)
25768 .nr(4)
25769 .kr(8)
25770 .sr(1)
25771 .m(2)
25772 .n(4)
25773 .k(k)
25774 .ks(3)
25775 .a_offset(83)
25776 .zero_index(mz)
25777 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
25778 }
25779 }
25780 }
25781
25782 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, qmin) {
25783 TEST_REQUIRES_X86_SSE2;
25784 GemmMicrokernelTester()
25785 .mr(2)
25786 .nr(4)
25787 .kr(8)
25788 .sr(1)
25789 .m(2)
25790 .n(4)
25791 .k(8)
25792 .qmin(128)
25793 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
25794 }
25795
25796 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, qmax) {
25797 TEST_REQUIRES_X86_SSE2;
25798 GemmMicrokernelTester()
25799 .mr(2)
25800 .nr(4)
25801 .kr(8)
25802 .sr(1)
25803 .m(2)
25804 .n(4)
25805 .k(8)
25806 .qmax(128)
25807 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
25808 }
25809
25810 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, strided_cm) {
25811 TEST_REQUIRES_X86_SSE2;
25812 GemmMicrokernelTester()
25813 .mr(2)
25814 .nr(4)
25815 .kr(8)
25816 .sr(1)
25817 .m(2)
25818 .n(4)
25819 .k(8)
25820 .cm_stride(7)
25821 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
25822 }
25823
25824 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, no_a_zero_point) {
25825 TEST_REQUIRES_X86_SSE2;
25826 for (size_t k = 1; k <= 40; k += 9) {
25827 GemmMicrokernelTester()
25828 .mr(2)
25829 .nr(4)
25830 .kr(8)
25831 .sr(1)
25832 .m(2)
25833 .n(4)
25834 .k(k)
25835 .a_zero_point(0)
25836 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
25837 }
25838 }
25839
25840 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, no_b_zero_point) {
25841 TEST_REQUIRES_X86_SSE2;
25842 for (size_t k = 1; k <= 40; k += 9) {
25843 GemmMicrokernelTester()
25844 .mr(2)
25845 .nr(4)
25846 .kr(8)
25847 .sr(1)
25848 .m(2)
25849 .n(4)
25850 .k(k)
25851 .b_zero_point(0)
25852 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
25853 }
25854 }
25855
25856 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, no_zero_point) {
25857 TEST_REQUIRES_X86_SSE2;
25858 for (size_t k = 1; k <= 40; k += 9) {
25859 GemmMicrokernelTester()
25860 .mr(2)
25861 .nr(4)
25862 .kr(8)
25863 .sr(1)
25864 .m(2)
25865 .n(4)
25866 .k(k)
25867 .a_zero_point(0)
25868 .b_zero_point(0)
25869 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
25870 }
25871 }
25872#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
25873
25874
25875#if XNN_ARCH_X86 || XNN_ARCH_X86_64
25876 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_eq_8) {
25877 TEST_REQUIRES_X86_SSE2;
25878 GemmMicrokernelTester()
25879 .mr(3)
25880 .nr(4)
25881 .kr(8)
25882 .sr(1)
25883 .m(3)
25884 .n(4)
25885 .k(8)
25886 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
25887 }
25888
25889 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, strided_cn) {
25890 TEST_REQUIRES_X86_SSE2;
25891 GemmMicrokernelTester()
25892 .mr(3)
25893 .nr(4)
25894 .kr(8)
25895 .sr(1)
25896 .m(3)
25897 .n(4)
25898 .k(8)
25899 .cn_stride(7)
25900 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
25901 }
25902
25903 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_eq_8_subtile) {
25904 TEST_REQUIRES_X86_SSE2;
25905 for (uint32_t m = 1; m <= 3; m++) {
25906 for (uint32_t n = 1; n <= 4; n++) {
25907 GemmMicrokernelTester()
25908 .mr(3)
25909 .nr(4)
25910 .kr(8)
25911 .sr(1)
25912 .m(m)
25913 .n(n)
25914 .k(8)
25915 .iterations(1)
25916 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
25917 }
25918 }
25919 }
25920
25921 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_eq_8_subtile_m) {
25922 TEST_REQUIRES_X86_SSE2;
25923 for (uint32_t m = 1; m <= 3; m++) {
25924 GemmMicrokernelTester()
25925 .mr(3)
25926 .nr(4)
25927 .kr(8)
25928 .sr(1)
25929 .m(m)
25930 .n(4)
25931 .k(8)
25932 .iterations(1)
25933 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
25934 }
25935 }
25936
25937 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_eq_8_subtile_n) {
25938 TEST_REQUIRES_X86_SSE2;
25939 for (uint32_t n = 1; n <= 4; n++) {
25940 GemmMicrokernelTester()
25941 .mr(3)
25942 .nr(4)
25943 .kr(8)
25944 .sr(1)
25945 .m(3)
25946 .n(n)
25947 .k(8)
25948 .iterations(1)
25949 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
25950 }
25951 }
25952
25953 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_lt_8) {
25954 TEST_REQUIRES_X86_SSE2;
25955 for (size_t k = 1; k < 8; k++) {
25956 GemmMicrokernelTester()
25957 .mr(3)
25958 .nr(4)
25959 .kr(8)
25960 .sr(1)
25961 .m(3)
25962 .n(4)
25963 .k(k)
25964 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
25965 }
25966 }
25967
25968 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_lt_8_subtile) {
25969 TEST_REQUIRES_X86_SSE2;
25970 for (size_t k = 1; k < 8; k++) {
25971 for (uint32_t m = 1; m <= 3; m++) {
25972 for (uint32_t n = 1; n <= 4; n++) {
25973 GemmMicrokernelTester()
25974 .mr(3)
25975 .nr(4)
25976 .kr(8)
25977 .sr(1)
25978 .m(m)
25979 .n(n)
25980 .k(k)
25981 .iterations(1)
25982 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
25983 }
25984 }
25985 }
25986 }
25987
25988 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_gt_8) {
25989 TEST_REQUIRES_X86_SSE2;
25990 for (size_t k = 9; k < 16; k++) {
25991 GemmMicrokernelTester()
25992 .mr(3)
25993 .nr(4)
25994 .kr(8)
25995 .sr(1)
25996 .m(3)
25997 .n(4)
25998 .k(k)
25999 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
26000 }
26001 }
26002
26003 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_gt_8_subtile) {
26004 TEST_REQUIRES_X86_SSE2;
26005 for (size_t k = 9; k < 16; k++) {
26006 for (uint32_t m = 1; m <= 3; m++) {
26007 for (uint32_t n = 1; n <= 4; n++) {
26008 GemmMicrokernelTester()
26009 .mr(3)
26010 .nr(4)
26011 .kr(8)
26012 .sr(1)
26013 .m(m)
26014 .n(n)
26015 .k(k)
26016 .iterations(1)
26017 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
26018 }
26019 }
26020 }
26021 }
26022
26023 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_div_8) {
26024 TEST_REQUIRES_X86_SSE2;
26025 for (size_t k = 16; k <= 80; k += 8) {
26026 GemmMicrokernelTester()
26027 .mr(3)
26028 .nr(4)
26029 .kr(8)
26030 .sr(1)
26031 .m(3)
26032 .n(4)
26033 .k(k)
26034 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
26035 }
26036 }
26037
26038 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_div_8_subtile) {
26039 TEST_REQUIRES_X86_SSE2;
26040 for (size_t k = 16; k <= 80; k += 8) {
26041 for (uint32_t m = 1; m <= 3; m++) {
26042 for (uint32_t n = 1; n <= 4; n++) {
26043 GemmMicrokernelTester()
26044 .mr(3)
26045 .nr(4)
26046 .kr(8)
26047 .sr(1)
26048 .m(m)
26049 .n(n)
26050 .k(k)
26051 .iterations(1)
26052 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
26053 }
26054 }
26055 }
26056 }
26057
26058 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, n_gt_4) {
26059 TEST_REQUIRES_X86_SSE2;
26060 for (uint32_t n = 5; n < 8; n++) {
26061 for (size_t k = 1; k <= 40; k += 9) {
26062 GemmMicrokernelTester()
26063 .mr(3)
26064 .nr(4)
26065 .kr(8)
26066 .sr(1)
26067 .m(3)
26068 .n(4)
26069 .k(k)
26070 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
26071 }
26072 }
26073 }
26074
26075 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, n_gt_4_strided_cn) {
26076 TEST_REQUIRES_X86_SSE2;
26077 for (uint32_t n = 5; n < 8; n++) {
26078 for (size_t k = 1; k <= 40; k += 9) {
26079 GemmMicrokernelTester()
26080 .mr(3)
26081 .nr(4)
26082 .kr(8)
26083 .sr(1)
26084 .m(3)
26085 .n(4)
26086 .k(k)
26087 .cn_stride(7)
26088 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
26089 }
26090 }
26091 }
26092
26093 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, n_gt_4_subtile) {
26094 TEST_REQUIRES_X86_SSE2;
26095 for (uint32_t n = 5; n < 8; n++) {
26096 for (size_t k = 1; k <= 40; k += 9) {
26097 for (uint32_t m = 1; m <= 3; m++) {
26098 GemmMicrokernelTester()
26099 .mr(3)
26100 .nr(4)
26101 .kr(8)
26102 .sr(1)
26103 .m(m)
26104 .n(n)
26105 .k(k)
26106 .iterations(1)
26107 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
26108 }
26109 }
26110 }
26111 }
26112
26113 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, n_div_4) {
26114 TEST_REQUIRES_X86_SSE2;
26115 for (uint32_t n = 8; n <= 12; n += 4) {
26116 for (size_t k = 1; k <= 40; k += 9) {
26117 GemmMicrokernelTester()
26118 .mr(3)
26119 .nr(4)
26120 .kr(8)
26121 .sr(1)
26122 .m(3)
26123 .n(4)
26124 .k(k)
26125 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
26126 }
26127 }
26128 }
26129
26130 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, n_div_4_strided_cn) {
26131 TEST_REQUIRES_X86_SSE2;
26132 for (uint32_t n = 8; n <= 12; n += 4) {
26133 for (size_t k = 1; k <= 40; k += 9) {
26134 GemmMicrokernelTester()
26135 .mr(3)
26136 .nr(4)
26137 .kr(8)
26138 .sr(1)
26139 .m(3)
26140 .n(n)
26141 .k(k)
26142 .cn_stride(7)
26143 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
26144 }
26145 }
26146 }
26147
26148 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, n_div_4_subtile) {
26149 TEST_REQUIRES_X86_SSE2;
26150 for (uint32_t n = 8; n <= 12; n += 4) {
26151 for (size_t k = 1; k <= 40; k += 9) {
26152 for (uint32_t m = 1; m <= 3; m++) {
26153 GemmMicrokernelTester()
26154 .mr(3)
26155 .nr(4)
26156 .kr(8)
26157 .sr(1)
26158 .m(m)
26159 .n(n)
26160 .k(k)
26161 .iterations(1)
26162 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
26163 }
26164 }
26165 }
26166 }
26167
26168 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, small_kernel) {
26169 TEST_REQUIRES_X86_SSE2;
26170 for (size_t k = 1; k <= 40; k += 9) {
26171 GemmMicrokernelTester()
26172 .mr(3)
26173 .nr(4)
26174 .kr(8)
26175 .sr(1)
26176 .m(3)
26177 .n(4)
26178 .k(k)
26179 .ks(3)
26180 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
26181 }
26182 }
26183
26184 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, small_kernel_subtile) {
26185 TEST_REQUIRES_X86_SSE2;
26186 for (size_t k = 1; k <= 40; k += 9) {
26187 for (uint32_t m = 1; m <= 3; m++) {
26188 for (uint32_t n = 1; n <= 4; n++) {
26189 GemmMicrokernelTester()
26190 .mr(3)
26191 .nr(4)
26192 .kr(8)
26193 .sr(1)
26194 .m(m)
26195 .n(n)
26196 .k(k)
26197 .ks(3)
26198 .iterations(1)
26199 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
26200 }
26201 }
26202 }
26203 }
26204
26205 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, n_gt_4_small_kernel) {
26206 TEST_REQUIRES_X86_SSE2;
26207 for (uint32_t n = 5; n < 8; n++) {
26208 for (size_t k = 1; k <= 40; k += 9) {
26209 GemmMicrokernelTester()
26210 .mr(3)
26211 .nr(4)
26212 .kr(8)
26213 .sr(1)
26214 .m(3)
26215 .n(4)
26216 .k(k)
26217 .ks(3)
26218 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
26219 }
26220 }
26221 }
26222
26223 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, n_div_4_small_kernel) {
26224 TEST_REQUIRES_X86_SSE2;
26225 for (uint32_t n = 8; n <= 12; n += 4) {
26226 for (size_t k = 1; k <= 40; k += 9) {
26227 GemmMicrokernelTester()
26228 .mr(3)
26229 .nr(4)
26230 .kr(8)
26231 .sr(1)
26232 .m(3)
26233 .n(4)
26234 .k(k)
26235 .ks(3)
26236 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
26237 }
26238 }
26239 }
26240
26241 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, strided_cm_subtile) {
26242 TEST_REQUIRES_X86_SSE2;
26243 for (size_t k = 1; k <= 40; k += 9) {
26244 for (uint32_t m = 1; m <= 3; m++) {
26245 for (uint32_t n = 1; n <= 4; n++) {
26246 GemmMicrokernelTester()
26247 .mr(3)
26248 .nr(4)
26249 .kr(8)
26250 .sr(1)
26251 .m(m)
26252 .n(n)
26253 .k(k)
26254 .cm_stride(7)
26255 .iterations(1)
26256 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
26257 }
26258 }
26259 }
26260 }
26261
26262 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, a_offset) {
26263 TEST_REQUIRES_X86_SSE2;
26264 for (size_t k = 1; k <= 40; k += 9) {
26265 GemmMicrokernelTester()
26266 .mr(3)
26267 .nr(4)
26268 .kr(8)
26269 .sr(1)
26270 .m(3)
26271 .n(4)
26272 .k(k)
26273 .ks(3)
26274 .a_offset(127)
26275 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
26276 }
26277 }
26278
26279 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, zero) {
26280 TEST_REQUIRES_X86_SSE2;
26281 for (uint32_t mz = 0; mz < 3; mz++) {
26282 for (size_t k = 1; k <= 40; k += 9) {
26283 GemmMicrokernelTester()
26284 .mr(3)
26285 .nr(4)
26286 .kr(8)
26287 .sr(1)
26288 .m(3)
26289 .n(4)
26290 .k(k)
26291 .ks(3)
26292 .a_offset(127)
26293 .zero_index(mz)
26294 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
26295 }
26296 }
26297 }
26298
26299 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, qmin) {
26300 TEST_REQUIRES_X86_SSE2;
26301 GemmMicrokernelTester()
26302 .mr(3)
26303 .nr(4)
26304 .kr(8)
26305 .sr(1)
26306 .m(3)
26307 .n(4)
26308 .k(8)
26309 .qmin(128)
26310 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
26311 }
26312
26313 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, qmax) {
26314 TEST_REQUIRES_X86_SSE2;
26315 GemmMicrokernelTester()
26316 .mr(3)
26317 .nr(4)
26318 .kr(8)
26319 .sr(1)
26320 .m(3)
26321 .n(4)
26322 .k(8)
26323 .qmax(128)
26324 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
26325 }
26326
26327 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, strided_cm) {
26328 TEST_REQUIRES_X86_SSE2;
26329 GemmMicrokernelTester()
26330 .mr(3)
26331 .nr(4)
26332 .kr(8)
26333 .sr(1)
26334 .m(3)
26335 .n(4)
26336 .k(8)
26337 .cm_stride(7)
26338 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
26339 }
26340
26341 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, no_a_zero_point) {
26342 TEST_REQUIRES_X86_SSE2;
26343 for (size_t k = 1; k <= 40; k += 9) {
26344 GemmMicrokernelTester()
26345 .mr(3)
26346 .nr(4)
26347 .kr(8)
26348 .sr(1)
26349 .m(3)
26350 .n(4)
26351 .k(k)
26352 .a_zero_point(0)
26353 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
26354 }
26355 }
26356
26357 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, no_b_zero_point) {
26358 TEST_REQUIRES_X86_SSE2;
26359 for (size_t k = 1; k <= 40; k += 9) {
26360 GemmMicrokernelTester()
26361 .mr(3)
26362 .nr(4)
26363 .kr(8)
26364 .sr(1)
26365 .m(3)
26366 .n(4)
26367 .k(k)
26368 .b_zero_point(0)
26369 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
26370 }
26371 }
26372
26373 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, no_zero_point) {
26374 TEST_REQUIRES_X86_SSE2;
26375 for (size_t k = 1; k <= 40; k += 9) {
26376 GemmMicrokernelTester()
26377 .mr(3)
26378 .nr(4)
26379 .kr(8)
26380 .sr(1)
26381 .m(3)
26382 .n(4)
26383 .k(k)
26384 .a_zero_point(0)
26385 .b_zero_point(0)
26386 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
26387 }
26388 }
26389#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
26390
26391
26392#if XNN_ARCH_X86 || XNN_ARCH_X86_64
26393 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_eq_8) {
26394 TEST_REQUIRES_X86_SSE41;
26395 GemmMicrokernelTester()
26396 .mr(1)
26397 .nr(4)
26398 .kr(8)
26399 .sr(1)
26400 .m(1)
26401 .n(4)
26402 .k(8)
26403 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
26404 }
26405
26406 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, strided_cn) {
26407 TEST_REQUIRES_X86_SSE41;
26408 GemmMicrokernelTester()
26409 .mr(1)
26410 .nr(4)
26411 .kr(8)
26412 .sr(1)
26413 .m(1)
26414 .n(4)
26415 .k(8)
26416 .cn_stride(7)
26417 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
26418 }
26419
26420 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_eq_8_subtile) {
26421 TEST_REQUIRES_X86_SSE41;
26422 for (uint32_t m = 1; m <= 1; m++) {
26423 for (uint32_t n = 1; n <= 4; n++) {
26424 GemmMicrokernelTester()
26425 .mr(1)
26426 .nr(4)
26427 .kr(8)
26428 .sr(1)
26429 .m(m)
26430 .n(n)
26431 .k(8)
26432 .iterations(1)
26433 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
26434 }
26435 }
26436 }
26437
26438 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_eq_8_subtile_m) {
26439 TEST_REQUIRES_X86_SSE41;
26440 for (uint32_t m = 1; m <= 1; m++) {
26441 GemmMicrokernelTester()
26442 .mr(1)
26443 .nr(4)
26444 .kr(8)
26445 .sr(1)
26446 .m(m)
26447 .n(4)
26448 .k(8)
26449 .iterations(1)
26450 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
26451 }
26452 }
26453
26454 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_eq_8_subtile_n) {
26455 TEST_REQUIRES_X86_SSE41;
26456 for (uint32_t n = 1; n <= 4; n++) {
26457 GemmMicrokernelTester()
26458 .mr(1)
26459 .nr(4)
26460 .kr(8)
26461 .sr(1)
26462 .m(1)
26463 .n(n)
26464 .k(8)
26465 .iterations(1)
26466 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
26467 }
26468 }
26469
26470 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_lt_8) {
26471 TEST_REQUIRES_X86_SSE41;
26472 for (size_t k = 1; k < 8; k++) {
26473 GemmMicrokernelTester()
26474 .mr(1)
26475 .nr(4)
26476 .kr(8)
26477 .sr(1)
26478 .m(1)
26479 .n(4)
26480 .k(k)
26481 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
26482 }
26483 }
26484
26485 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_lt_8_subtile) {
26486 TEST_REQUIRES_X86_SSE41;
26487 for (size_t k = 1; k < 8; k++) {
26488 for (uint32_t m = 1; m <= 1; m++) {
26489 for (uint32_t n = 1; n <= 4; n++) {
26490 GemmMicrokernelTester()
26491 .mr(1)
26492 .nr(4)
26493 .kr(8)
26494 .sr(1)
26495 .m(m)
26496 .n(n)
26497 .k(k)
26498 .iterations(1)
26499 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
26500 }
26501 }
26502 }
26503 }
26504
26505 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_gt_8) {
26506 TEST_REQUIRES_X86_SSE41;
26507 for (size_t k = 9; k < 16; k++) {
26508 GemmMicrokernelTester()
26509 .mr(1)
26510 .nr(4)
26511 .kr(8)
26512 .sr(1)
26513 .m(1)
26514 .n(4)
26515 .k(k)
26516 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
26517 }
26518 }
26519
26520 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_gt_8_subtile) {
26521 TEST_REQUIRES_X86_SSE41;
26522 for (size_t k = 9; k < 16; k++) {
26523 for (uint32_t m = 1; m <= 1; m++) {
26524 for (uint32_t n = 1; n <= 4; n++) {
26525 GemmMicrokernelTester()
26526 .mr(1)
26527 .nr(4)
26528 .kr(8)
26529 .sr(1)
26530 .m(m)
26531 .n(n)
26532 .k(k)
26533 .iterations(1)
26534 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
26535 }
26536 }
26537 }
26538 }
26539
26540 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_div_8) {
26541 TEST_REQUIRES_X86_SSE41;
26542 for (size_t k = 16; k <= 80; k += 8) {
26543 GemmMicrokernelTester()
26544 .mr(1)
26545 .nr(4)
26546 .kr(8)
26547 .sr(1)
26548 .m(1)
26549 .n(4)
26550 .k(k)
26551 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
26552 }
26553 }
26554
26555 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_div_8_subtile) {
26556 TEST_REQUIRES_X86_SSE41;
26557 for (size_t k = 16; k <= 80; k += 8) {
26558 for (uint32_t m = 1; m <= 1; m++) {
26559 for (uint32_t n = 1; n <= 4; n++) {
26560 GemmMicrokernelTester()
26561 .mr(1)
26562 .nr(4)
26563 .kr(8)
26564 .sr(1)
26565 .m(m)
26566 .n(n)
26567 .k(k)
26568 .iterations(1)
26569 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
26570 }
26571 }
26572 }
26573 }
26574
26575 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_gt_4) {
26576 TEST_REQUIRES_X86_SSE41;
26577 for (uint32_t n = 5; n < 8; n++) {
26578 for (size_t k = 1; k <= 40; k += 9) {
26579 GemmMicrokernelTester()
26580 .mr(1)
26581 .nr(4)
26582 .kr(8)
26583 .sr(1)
26584 .m(1)
26585 .n(4)
26586 .k(k)
26587 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
26588 }
26589 }
26590 }
26591
26592 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_gt_4_strided_cn) {
26593 TEST_REQUIRES_X86_SSE41;
26594 for (uint32_t n = 5; n < 8; n++) {
26595 for (size_t k = 1; k <= 40; k += 9) {
26596 GemmMicrokernelTester()
26597 .mr(1)
26598 .nr(4)
26599 .kr(8)
26600 .sr(1)
26601 .m(1)
26602 .n(4)
26603 .k(k)
26604 .cn_stride(7)
26605 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
26606 }
26607 }
26608 }
26609
26610 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_gt_4_subtile) {
26611 TEST_REQUIRES_X86_SSE41;
26612 for (uint32_t n = 5; n < 8; n++) {
26613 for (size_t k = 1; k <= 40; k += 9) {
26614 for (uint32_t m = 1; m <= 1; m++) {
26615 GemmMicrokernelTester()
26616 .mr(1)
26617 .nr(4)
26618 .kr(8)
26619 .sr(1)
26620 .m(m)
26621 .n(n)
26622 .k(k)
26623 .iterations(1)
26624 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
26625 }
26626 }
26627 }
26628 }
26629
26630 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_div_4) {
26631 TEST_REQUIRES_X86_SSE41;
26632 for (uint32_t n = 8; n <= 12; n += 4) {
26633 for (size_t k = 1; k <= 40; k += 9) {
26634 GemmMicrokernelTester()
26635 .mr(1)
26636 .nr(4)
26637 .kr(8)
26638 .sr(1)
26639 .m(1)
26640 .n(4)
26641 .k(k)
26642 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
26643 }
26644 }
26645 }
26646
26647 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_div_4_strided_cn) {
26648 TEST_REQUIRES_X86_SSE41;
26649 for (uint32_t n = 8; n <= 12; n += 4) {
26650 for (size_t k = 1; k <= 40; k += 9) {
26651 GemmMicrokernelTester()
26652 .mr(1)
26653 .nr(4)
26654 .kr(8)
26655 .sr(1)
26656 .m(1)
26657 .n(n)
26658 .k(k)
26659 .cn_stride(7)
26660 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
26661 }
26662 }
26663 }
26664
26665 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_div_4_subtile) {
26666 TEST_REQUIRES_X86_SSE41;
26667 for (uint32_t n = 8; n <= 12; n += 4) {
26668 for (size_t k = 1; k <= 40; k += 9) {
26669 for (uint32_t m = 1; m <= 1; m++) {
26670 GemmMicrokernelTester()
26671 .mr(1)
26672 .nr(4)
26673 .kr(8)
26674 .sr(1)
26675 .m(m)
26676 .n(n)
26677 .k(k)
26678 .iterations(1)
26679 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
26680 }
26681 }
26682 }
26683 }
26684
26685 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, small_kernel) {
26686 TEST_REQUIRES_X86_SSE41;
26687 for (size_t k = 1; k <= 40; k += 9) {
26688 GemmMicrokernelTester()
26689 .mr(1)
26690 .nr(4)
26691 .kr(8)
26692 .sr(1)
26693 .m(1)
26694 .n(4)
26695 .k(k)
26696 .ks(3)
26697 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
26698 }
26699 }
26700
26701 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, small_kernel_subtile) {
26702 TEST_REQUIRES_X86_SSE41;
26703 for (size_t k = 1; k <= 40; k += 9) {
26704 for (uint32_t m = 1; m <= 1; m++) {
26705 for (uint32_t n = 1; n <= 4; n++) {
26706 GemmMicrokernelTester()
26707 .mr(1)
26708 .nr(4)
26709 .kr(8)
26710 .sr(1)
26711 .m(m)
26712 .n(n)
26713 .k(k)
26714 .ks(3)
26715 .iterations(1)
26716 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
26717 }
26718 }
26719 }
26720 }
26721
26722 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_gt_4_small_kernel) {
26723 TEST_REQUIRES_X86_SSE41;
26724 for (uint32_t n = 5; n < 8; n++) {
26725 for (size_t k = 1; k <= 40; k += 9) {
26726 GemmMicrokernelTester()
26727 .mr(1)
26728 .nr(4)
26729 .kr(8)
26730 .sr(1)
26731 .m(1)
26732 .n(4)
26733 .k(k)
26734 .ks(3)
26735 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
26736 }
26737 }
26738 }
26739
26740 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_div_4_small_kernel) {
26741 TEST_REQUIRES_X86_SSE41;
26742 for (uint32_t n = 8; n <= 12; n += 4) {
26743 for (size_t k = 1; k <= 40; k += 9) {
26744 GemmMicrokernelTester()
26745 .mr(1)
26746 .nr(4)
26747 .kr(8)
26748 .sr(1)
26749 .m(1)
26750 .n(4)
26751 .k(k)
26752 .ks(3)
26753 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
26754 }
26755 }
26756 }
26757
26758 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, strided_cm_subtile) {
26759 TEST_REQUIRES_X86_SSE41;
26760 for (size_t k = 1; k <= 40; k += 9) {
26761 for (uint32_t m = 1; m <= 1; m++) {
26762 for (uint32_t n = 1; n <= 4; n++) {
26763 GemmMicrokernelTester()
26764 .mr(1)
26765 .nr(4)
26766 .kr(8)
26767 .sr(1)
26768 .m(m)
26769 .n(n)
26770 .k(k)
26771 .cm_stride(7)
26772 .iterations(1)
26773 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
26774 }
26775 }
26776 }
26777 }
26778
26779 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, a_offset) {
26780 TEST_REQUIRES_X86_SSE41;
26781 for (size_t k = 1; k <= 40; k += 9) {
26782 GemmMicrokernelTester()
26783 .mr(1)
26784 .nr(4)
26785 .kr(8)
26786 .sr(1)
26787 .m(1)
26788 .n(4)
26789 .k(k)
26790 .ks(3)
26791 .a_offset(43)
26792 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
26793 }
26794 }
26795
26796 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, zero) {
26797 TEST_REQUIRES_X86_SSE41;
26798 for (uint32_t mz = 0; mz < 1; mz++) {
26799 for (size_t k = 1; k <= 40; k += 9) {
26800 GemmMicrokernelTester()
26801 .mr(1)
26802 .nr(4)
26803 .kr(8)
26804 .sr(1)
26805 .m(1)
26806 .n(4)
26807 .k(k)
26808 .ks(3)
26809 .a_offset(43)
26810 .zero_index(mz)
26811 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
26812 }
26813 }
26814 }
26815
26816 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, qmin) {
26817 TEST_REQUIRES_X86_SSE41;
26818 GemmMicrokernelTester()
26819 .mr(1)
26820 .nr(4)
26821 .kr(8)
26822 .sr(1)
26823 .m(1)
26824 .n(4)
26825 .k(8)
26826 .qmin(128)
26827 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
26828 }
26829
26830 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, qmax) {
26831 TEST_REQUIRES_X86_SSE41;
26832 GemmMicrokernelTester()
26833 .mr(1)
26834 .nr(4)
26835 .kr(8)
26836 .sr(1)
26837 .m(1)
26838 .n(4)
26839 .k(8)
26840 .qmax(128)
26841 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
26842 }
26843
26844 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, strided_cm) {
26845 TEST_REQUIRES_X86_SSE41;
26846 GemmMicrokernelTester()
26847 .mr(1)
26848 .nr(4)
26849 .kr(8)
26850 .sr(1)
26851 .m(1)
26852 .n(4)
26853 .k(8)
26854 .cm_stride(7)
26855 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
26856 }
26857
26858 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, no_a_zero_point) {
26859 TEST_REQUIRES_X86_SSE41;
26860 for (size_t k = 1; k <= 40; k += 9) {
26861 GemmMicrokernelTester()
26862 .mr(1)
26863 .nr(4)
26864 .kr(8)
26865 .sr(1)
26866 .m(1)
26867 .n(4)
26868 .k(k)
26869 .a_zero_point(0)
26870 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
26871 }
26872 }
26873
26874 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, no_b_zero_point) {
26875 TEST_REQUIRES_X86_SSE41;
26876 for (size_t k = 1; k <= 40; k += 9) {
26877 GemmMicrokernelTester()
26878 .mr(1)
26879 .nr(4)
26880 .kr(8)
26881 .sr(1)
26882 .m(1)
26883 .n(4)
26884 .k(k)
26885 .b_zero_point(0)
26886 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
26887 }
26888 }
26889
26890 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, no_zero_point) {
26891 TEST_REQUIRES_X86_SSE41;
26892 for (size_t k = 1; k <= 40; k += 9) {
26893 GemmMicrokernelTester()
26894 .mr(1)
26895 .nr(4)
26896 .kr(8)
26897 .sr(1)
26898 .m(1)
26899 .n(4)
26900 .k(k)
26901 .a_zero_point(0)
26902 .b_zero_point(0)
26903 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
26904 }
26905 }
26906#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
26907
26908
26909#if XNN_ARCH_X86 || XNN_ARCH_X86_64
26910 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_eq_8) {
26911 TEST_REQUIRES_X86_SSE41;
26912 GemmMicrokernelTester()
26913 .mr(2)
26914 .nr(4)
26915 .kr(8)
26916 .sr(1)
26917 .m(2)
26918 .n(4)
26919 .k(8)
26920 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
26921 }
26922
26923 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, strided_cn) {
26924 TEST_REQUIRES_X86_SSE41;
26925 GemmMicrokernelTester()
26926 .mr(2)
26927 .nr(4)
26928 .kr(8)
26929 .sr(1)
26930 .m(2)
26931 .n(4)
26932 .k(8)
26933 .cn_stride(7)
26934 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
26935 }
26936
26937 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_eq_8_subtile) {
26938 TEST_REQUIRES_X86_SSE41;
26939 for (uint32_t m = 1; m <= 2; m++) {
26940 for (uint32_t n = 1; n <= 4; n++) {
26941 GemmMicrokernelTester()
26942 .mr(2)
26943 .nr(4)
26944 .kr(8)
26945 .sr(1)
26946 .m(m)
26947 .n(n)
26948 .k(8)
26949 .iterations(1)
26950 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
26951 }
26952 }
26953 }
26954
26955 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_eq_8_subtile_m) {
26956 TEST_REQUIRES_X86_SSE41;
26957 for (uint32_t m = 1; m <= 2; m++) {
26958 GemmMicrokernelTester()
26959 .mr(2)
26960 .nr(4)
26961 .kr(8)
26962 .sr(1)
26963 .m(m)
26964 .n(4)
26965 .k(8)
26966 .iterations(1)
26967 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
26968 }
26969 }
26970
26971 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_eq_8_subtile_n) {
26972 TEST_REQUIRES_X86_SSE41;
26973 for (uint32_t n = 1; n <= 4; n++) {
26974 GemmMicrokernelTester()
26975 .mr(2)
26976 .nr(4)
26977 .kr(8)
26978 .sr(1)
26979 .m(2)
26980 .n(n)
26981 .k(8)
26982 .iterations(1)
26983 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
26984 }
26985 }
26986
26987 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_lt_8) {
26988 TEST_REQUIRES_X86_SSE41;
26989 for (size_t k = 1; k < 8; k++) {
26990 GemmMicrokernelTester()
26991 .mr(2)
26992 .nr(4)
26993 .kr(8)
26994 .sr(1)
26995 .m(2)
26996 .n(4)
26997 .k(k)
26998 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
26999 }
27000 }
27001
27002 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_lt_8_subtile) {
27003 TEST_REQUIRES_X86_SSE41;
27004 for (size_t k = 1; k < 8; k++) {
27005 for (uint32_t m = 1; m <= 2; m++) {
27006 for (uint32_t n = 1; n <= 4; n++) {
27007 GemmMicrokernelTester()
27008 .mr(2)
27009 .nr(4)
27010 .kr(8)
27011 .sr(1)
27012 .m(m)
27013 .n(n)
27014 .k(k)
27015 .iterations(1)
27016 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
27017 }
27018 }
27019 }
27020 }
27021
27022 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_gt_8) {
27023 TEST_REQUIRES_X86_SSE41;
27024 for (size_t k = 9; k < 16; k++) {
27025 GemmMicrokernelTester()
27026 .mr(2)
27027 .nr(4)
27028 .kr(8)
27029 .sr(1)
27030 .m(2)
27031 .n(4)
27032 .k(k)
27033 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
27034 }
27035 }
27036
27037 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_gt_8_subtile) {
27038 TEST_REQUIRES_X86_SSE41;
27039 for (size_t k = 9; k < 16; k++) {
27040 for (uint32_t m = 1; m <= 2; m++) {
27041 for (uint32_t n = 1; n <= 4; n++) {
27042 GemmMicrokernelTester()
27043 .mr(2)
27044 .nr(4)
27045 .kr(8)
27046 .sr(1)
27047 .m(m)
27048 .n(n)
27049 .k(k)
27050 .iterations(1)
27051 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
27052 }
27053 }
27054 }
27055 }
27056
27057 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_div_8) {
27058 TEST_REQUIRES_X86_SSE41;
27059 for (size_t k = 16; k <= 80; k += 8) {
27060 GemmMicrokernelTester()
27061 .mr(2)
27062 .nr(4)
27063 .kr(8)
27064 .sr(1)
27065 .m(2)
27066 .n(4)
27067 .k(k)
27068 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
27069 }
27070 }
27071
27072 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_div_8_subtile) {
27073 TEST_REQUIRES_X86_SSE41;
27074 for (size_t k = 16; k <= 80; k += 8) {
27075 for (uint32_t m = 1; m <= 2; m++) {
27076 for (uint32_t n = 1; n <= 4; n++) {
27077 GemmMicrokernelTester()
27078 .mr(2)
27079 .nr(4)
27080 .kr(8)
27081 .sr(1)
27082 .m(m)
27083 .n(n)
27084 .k(k)
27085 .iterations(1)
27086 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
27087 }
27088 }
27089 }
27090 }
27091
27092 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_gt_4) {
27093 TEST_REQUIRES_X86_SSE41;
27094 for (uint32_t n = 5; n < 8; n++) {
27095 for (size_t k = 1; k <= 40; k += 9) {
27096 GemmMicrokernelTester()
27097 .mr(2)
27098 .nr(4)
27099 .kr(8)
27100 .sr(1)
27101 .m(2)
27102 .n(4)
27103 .k(k)
27104 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
27105 }
27106 }
27107 }
27108
27109 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_gt_4_strided_cn) {
27110 TEST_REQUIRES_X86_SSE41;
27111 for (uint32_t n = 5; n < 8; n++) {
27112 for (size_t k = 1; k <= 40; k += 9) {
27113 GemmMicrokernelTester()
27114 .mr(2)
27115 .nr(4)
27116 .kr(8)
27117 .sr(1)
27118 .m(2)
27119 .n(4)
27120 .k(k)
27121 .cn_stride(7)
27122 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
27123 }
27124 }
27125 }
27126
27127 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_gt_4_subtile) {
27128 TEST_REQUIRES_X86_SSE41;
27129 for (uint32_t n = 5; n < 8; n++) {
27130 for (size_t k = 1; k <= 40; k += 9) {
27131 for (uint32_t m = 1; m <= 2; m++) {
27132 GemmMicrokernelTester()
27133 .mr(2)
27134 .nr(4)
27135 .kr(8)
27136 .sr(1)
27137 .m(m)
27138 .n(n)
27139 .k(k)
27140 .iterations(1)
27141 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
27142 }
27143 }
27144 }
27145 }
27146
27147 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_div_4) {
27148 TEST_REQUIRES_X86_SSE41;
27149 for (uint32_t n = 8; n <= 12; n += 4) {
27150 for (size_t k = 1; k <= 40; k += 9) {
27151 GemmMicrokernelTester()
27152 .mr(2)
27153 .nr(4)
27154 .kr(8)
27155 .sr(1)
27156 .m(2)
27157 .n(4)
27158 .k(k)
27159 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
27160 }
27161 }
27162 }
27163
27164 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_div_4_strided_cn) {
27165 TEST_REQUIRES_X86_SSE41;
27166 for (uint32_t n = 8; n <= 12; n += 4) {
27167 for (size_t k = 1; k <= 40; k += 9) {
27168 GemmMicrokernelTester()
27169 .mr(2)
27170 .nr(4)
27171 .kr(8)
27172 .sr(1)
27173 .m(2)
27174 .n(n)
27175 .k(k)
27176 .cn_stride(7)
27177 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
27178 }
27179 }
27180 }
27181
27182 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_div_4_subtile) {
27183 TEST_REQUIRES_X86_SSE41;
27184 for (uint32_t n = 8; n <= 12; n += 4) {
27185 for (size_t k = 1; k <= 40; k += 9) {
27186 for (uint32_t m = 1; m <= 2; m++) {
27187 GemmMicrokernelTester()
27188 .mr(2)
27189 .nr(4)
27190 .kr(8)
27191 .sr(1)
27192 .m(m)
27193 .n(n)
27194 .k(k)
27195 .iterations(1)
27196 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
27197 }
27198 }
27199 }
27200 }
27201
27202 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, small_kernel) {
27203 TEST_REQUIRES_X86_SSE41;
27204 for (size_t k = 1; k <= 40; k += 9) {
27205 GemmMicrokernelTester()
27206 .mr(2)
27207 .nr(4)
27208 .kr(8)
27209 .sr(1)
27210 .m(2)
27211 .n(4)
27212 .k(k)
27213 .ks(3)
27214 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
27215 }
27216 }
27217
27218 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, small_kernel_subtile) {
27219 TEST_REQUIRES_X86_SSE41;
27220 for (size_t k = 1; k <= 40; k += 9) {
27221 for (uint32_t m = 1; m <= 2; m++) {
27222 for (uint32_t n = 1; n <= 4; n++) {
27223 GemmMicrokernelTester()
27224 .mr(2)
27225 .nr(4)
27226 .kr(8)
27227 .sr(1)
27228 .m(m)
27229 .n(n)
27230 .k(k)
27231 .ks(3)
27232 .iterations(1)
27233 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
27234 }
27235 }
27236 }
27237 }
27238
27239 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_gt_4_small_kernel) {
27240 TEST_REQUIRES_X86_SSE41;
27241 for (uint32_t n = 5; n < 8; n++) {
27242 for (size_t k = 1; k <= 40; k += 9) {
27243 GemmMicrokernelTester()
27244 .mr(2)
27245 .nr(4)
27246 .kr(8)
27247 .sr(1)
27248 .m(2)
27249 .n(4)
27250 .k(k)
27251 .ks(3)
27252 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
27253 }
27254 }
27255 }
27256
27257 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_div_4_small_kernel) {
27258 TEST_REQUIRES_X86_SSE41;
27259 for (uint32_t n = 8; n <= 12; n += 4) {
27260 for (size_t k = 1; k <= 40; k += 9) {
27261 GemmMicrokernelTester()
27262 .mr(2)
27263 .nr(4)
27264 .kr(8)
27265 .sr(1)
27266 .m(2)
27267 .n(4)
27268 .k(k)
27269 .ks(3)
27270 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
27271 }
27272 }
27273 }
27274
27275 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, strided_cm_subtile) {
27276 TEST_REQUIRES_X86_SSE41;
27277 for (size_t k = 1; k <= 40; k += 9) {
27278 for (uint32_t m = 1; m <= 2; m++) {
27279 for (uint32_t n = 1; n <= 4; n++) {
27280 GemmMicrokernelTester()
27281 .mr(2)
27282 .nr(4)
27283 .kr(8)
27284 .sr(1)
27285 .m(m)
27286 .n(n)
27287 .k(k)
27288 .cm_stride(7)
27289 .iterations(1)
27290 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
27291 }
27292 }
27293 }
27294 }
27295
27296 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, a_offset) {
27297 TEST_REQUIRES_X86_SSE41;
27298 for (size_t k = 1; k <= 40; k += 9) {
27299 GemmMicrokernelTester()
27300 .mr(2)
27301 .nr(4)
27302 .kr(8)
27303 .sr(1)
27304 .m(2)
27305 .n(4)
27306 .k(k)
27307 .ks(3)
27308 .a_offset(83)
27309 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
27310 }
27311 }
27312
27313 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, zero) {
27314 TEST_REQUIRES_X86_SSE41;
27315 for (uint32_t mz = 0; mz < 2; mz++) {
27316 for (size_t k = 1; k <= 40; k += 9) {
27317 GemmMicrokernelTester()
27318 .mr(2)
27319 .nr(4)
27320 .kr(8)
27321 .sr(1)
27322 .m(2)
27323 .n(4)
27324 .k(k)
27325 .ks(3)
27326 .a_offset(83)
27327 .zero_index(mz)
27328 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
27329 }
27330 }
27331 }
27332
27333 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, qmin) {
27334 TEST_REQUIRES_X86_SSE41;
27335 GemmMicrokernelTester()
27336 .mr(2)
27337 .nr(4)
27338 .kr(8)
27339 .sr(1)
27340 .m(2)
27341 .n(4)
27342 .k(8)
27343 .qmin(128)
27344 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
27345 }
27346
27347 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, qmax) {
27348 TEST_REQUIRES_X86_SSE41;
27349 GemmMicrokernelTester()
27350 .mr(2)
27351 .nr(4)
27352 .kr(8)
27353 .sr(1)
27354 .m(2)
27355 .n(4)
27356 .k(8)
27357 .qmax(128)
27358 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
27359 }
27360
27361 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, strided_cm) {
27362 TEST_REQUIRES_X86_SSE41;
27363 GemmMicrokernelTester()
27364 .mr(2)
27365 .nr(4)
27366 .kr(8)
27367 .sr(1)
27368 .m(2)
27369 .n(4)
27370 .k(8)
27371 .cm_stride(7)
27372 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
27373 }
27374
27375 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, no_a_zero_point) {
27376 TEST_REQUIRES_X86_SSE41;
27377 for (size_t k = 1; k <= 40; k += 9) {
27378 GemmMicrokernelTester()
27379 .mr(2)
27380 .nr(4)
27381 .kr(8)
27382 .sr(1)
27383 .m(2)
27384 .n(4)
27385 .k(k)
27386 .a_zero_point(0)
27387 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
27388 }
27389 }
27390
27391 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, no_b_zero_point) {
27392 TEST_REQUIRES_X86_SSE41;
27393 for (size_t k = 1; k <= 40; k += 9) {
27394 GemmMicrokernelTester()
27395 .mr(2)
27396 .nr(4)
27397 .kr(8)
27398 .sr(1)
27399 .m(2)
27400 .n(4)
27401 .k(k)
27402 .b_zero_point(0)
27403 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
27404 }
27405 }
27406
27407 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, no_zero_point) {
27408 TEST_REQUIRES_X86_SSE41;
27409 for (size_t k = 1; k <= 40; k += 9) {
27410 GemmMicrokernelTester()
27411 .mr(2)
27412 .nr(4)
27413 .kr(8)
27414 .sr(1)
27415 .m(2)
27416 .n(4)
27417 .k(k)
27418 .a_zero_point(0)
27419 .b_zero_point(0)
27420 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
27421 }
27422 }
27423#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
27424
27425
27426#if XNN_ARCH_X86 || XNN_ARCH_X86_64
27427 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_eq_8) {
27428 TEST_REQUIRES_X86_SSE41;
27429 GemmMicrokernelTester()
27430 .mr(3)
27431 .nr(4)
27432 .kr(8)
27433 .sr(1)
27434 .m(3)
27435 .n(4)
27436 .k(8)
27437 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
27438 }
27439
27440 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, strided_cn) {
27441 TEST_REQUIRES_X86_SSE41;
27442 GemmMicrokernelTester()
27443 .mr(3)
27444 .nr(4)
27445 .kr(8)
27446 .sr(1)
27447 .m(3)
27448 .n(4)
27449 .k(8)
27450 .cn_stride(7)
27451 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
27452 }
27453
27454 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_eq_8_subtile) {
27455 TEST_REQUIRES_X86_SSE41;
27456 for (uint32_t m = 1; m <= 3; m++) {
27457 for (uint32_t n = 1; n <= 4; n++) {
27458 GemmMicrokernelTester()
27459 .mr(3)
27460 .nr(4)
27461 .kr(8)
27462 .sr(1)
27463 .m(m)
27464 .n(n)
27465 .k(8)
27466 .iterations(1)
27467 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
27468 }
27469 }
27470 }
27471
27472 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_eq_8_subtile_m) {
27473 TEST_REQUIRES_X86_SSE41;
27474 for (uint32_t m = 1; m <= 3; m++) {
27475 GemmMicrokernelTester()
27476 .mr(3)
27477 .nr(4)
27478 .kr(8)
27479 .sr(1)
27480 .m(m)
27481 .n(4)
27482 .k(8)
27483 .iterations(1)
27484 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
27485 }
27486 }
27487
27488 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_eq_8_subtile_n) {
27489 TEST_REQUIRES_X86_SSE41;
27490 for (uint32_t n = 1; n <= 4; n++) {
27491 GemmMicrokernelTester()
27492 .mr(3)
27493 .nr(4)
27494 .kr(8)
27495 .sr(1)
27496 .m(3)
27497 .n(n)
27498 .k(8)
27499 .iterations(1)
27500 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
27501 }
27502 }
27503
27504 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_lt_8) {
27505 TEST_REQUIRES_X86_SSE41;
27506 for (size_t k = 1; k < 8; k++) {
27507 GemmMicrokernelTester()
27508 .mr(3)
27509 .nr(4)
27510 .kr(8)
27511 .sr(1)
27512 .m(3)
27513 .n(4)
27514 .k(k)
27515 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
27516 }
27517 }
27518
27519 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_lt_8_subtile) {
27520 TEST_REQUIRES_X86_SSE41;
27521 for (size_t k = 1; k < 8; k++) {
27522 for (uint32_t m = 1; m <= 3; m++) {
27523 for (uint32_t n = 1; n <= 4; n++) {
27524 GemmMicrokernelTester()
27525 .mr(3)
27526 .nr(4)
27527 .kr(8)
27528 .sr(1)
27529 .m(m)
27530 .n(n)
27531 .k(k)
27532 .iterations(1)
27533 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
27534 }
27535 }
27536 }
27537 }
27538
27539 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_gt_8) {
27540 TEST_REQUIRES_X86_SSE41;
27541 for (size_t k = 9; k < 16; k++) {
27542 GemmMicrokernelTester()
27543 .mr(3)
27544 .nr(4)
27545 .kr(8)
27546 .sr(1)
27547 .m(3)
27548 .n(4)
27549 .k(k)
27550 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
27551 }
27552 }
27553
27554 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_gt_8_subtile) {
27555 TEST_REQUIRES_X86_SSE41;
27556 for (size_t k = 9; k < 16; k++) {
27557 for (uint32_t m = 1; m <= 3; m++) {
27558 for (uint32_t n = 1; n <= 4; n++) {
27559 GemmMicrokernelTester()
27560 .mr(3)
27561 .nr(4)
27562 .kr(8)
27563 .sr(1)
27564 .m(m)
27565 .n(n)
27566 .k(k)
27567 .iterations(1)
27568 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
27569 }
27570 }
27571 }
27572 }
27573
27574 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_div_8) {
27575 TEST_REQUIRES_X86_SSE41;
27576 for (size_t k = 16; k <= 80; k += 8) {
27577 GemmMicrokernelTester()
27578 .mr(3)
27579 .nr(4)
27580 .kr(8)
27581 .sr(1)
27582 .m(3)
27583 .n(4)
27584 .k(k)
27585 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
27586 }
27587 }
27588
27589 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_div_8_subtile) {
27590 TEST_REQUIRES_X86_SSE41;
27591 for (size_t k = 16; k <= 80; k += 8) {
27592 for (uint32_t m = 1; m <= 3; m++) {
27593 for (uint32_t n = 1; n <= 4; n++) {
27594 GemmMicrokernelTester()
27595 .mr(3)
27596 .nr(4)
27597 .kr(8)
27598 .sr(1)
27599 .m(m)
27600 .n(n)
27601 .k(k)
27602 .iterations(1)
27603 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
27604 }
27605 }
27606 }
27607 }
27608
27609 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, n_gt_4) {
27610 TEST_REQUIRES_X86_SSE41;
27611 for (uint32_t n = 5; n < 8; n++) {
27612 for (size_t k = 1; k <= 40; k += 9) {
27613 GemmMicrokernelTester()
27614 .mr(3)
27615 .nr(4)
27616 .kr(8)
27617 .sr(1)
27618 .m(3)
27619 .n(4)
27620 .k(k)
27621 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
27622 }
27623 }
27624 }
27625
27626 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, n_gt_4_strided_cn) {
27627 TEST_REQUIRES_X86_SSE41;
27628 for (uint32_t n = 5; n < 8; n++) {
27629 for (size_t k = 1; k <= 40; k += 9) {
27630 GemmMicrokernelTester()
27631 .mr(3)
27632 .nr(4)
27633 .kr(8)
27634 .sr(1)
27635 .m(3)
27636 .n(4)
27637 .k(k)
27638 .cn_stride(7)
27639 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
27640 }
27641 }
27642 }
27643
27644 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, n_gt_4_subtile) {
27645 TEST_REQUIRES_X86_SSE41;
27646 for (uint32_t n = 5; n < 8; n++) {
27647 for (size_t k = 1; k <= 40; k += 9) {
27648 for (uint32_t m = 1; m <= 3; m++) {
27649 GemmMicrokernelTester()
27650 .mr(3)
27651 .nr(4)
27652 .kr(8)
27653 .sr(1)
27654 .m(m)
27655 .n(n)
27656 .k(k)
27657 .iterations(1)
27658 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
27659 }
27660 }
27661 }
27662 }
27663
27664 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, n_div_4) {
27665 TEST_REQUIRES_X86_SSE41;
27666 for (uint32_t n = 8; n <= 12; n += 4) {
27667 for (size_t k = 1; k <= 40; k += 9) {
27668 GemmMicrokernelTester()
27669 .mr(3)
27670 .nr(4)
27671 .kr(8)
27672 .sr(1)
27673 .m(3)
27674 .n(4)
27675 .k(k)
27676 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
27677 }
27678 }
27679 }
27680
27681 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, n_div_4_strided_cn) {
27682 TEST_REQUIRES_X86_SSE41;
27683 for (uint32_t n = 8; n <= 12; n += 4) {
27684 for (size_t k = 1; k <= 40; k += 9) {
27685 GemmMicrokernelTester()
27686 .mr(3)
27687 .nr(4)
27688 .kr(8)
27689 .sr(1)
27690 .m(3)
27691 .n(n)
27692 .k(k)
27693 .cn_stride(7)
27694 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
27695 }
27696 }
27697 }
27698
27699 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, n_div_4_subtile) {
27700 TEST_REQUIRES_X86_SSE41;
27701 for (uint32_t n = 8; n <= 12; n += 4) {
27702 for (size_t k = 1; k <= 40; k += 9) {
27703 for (uint32_t m = 1; m <= 3; m++) {
27704 GemmMicrokernelTester()
27705 .mr(3)
27706 .nr(4)
27707 .kr(8)
27708 .sr(1)
27709 .m(m)
27710 .n(n)
27711 .k(k)
27712 .iterations(1)
27713 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
27714 }
27715 }
27716 }
27717 }
27718
27719 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, small_kernel) {
27720 TEST_REQUIRES_X86_SSE41;
27721 for (size_t k = 1; k <= 40; k += 9) {
27722 GemmMicrokernelTester()
27723 .mr(3)
27724 .nr(4)
27725 .kr(8)
27726 .sr(1)
27727 .m(3)
27728 .n(4)
27729 .k(k)
27730 .ks(3)
27731 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
27732 }
27733 }
27734
27735 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, small_kernel_subtile) {
27736 TEST_REQUIRES_X86_SSE41;
27737 for (size_t k = 1; k <= 40; k += 9) {
27738 for (uint32_t m = 1; m <= 3; m++) {
27739 for (uint32_t n = 1; n <= 4; n++) {
27740 GemmMicrokernelTester()
27741 .mr(3)
27742 .nr(4)
27743 .kr(8)
27744 .sr(1)
27745 .m(m)
27746 .n(n)
27747 .k(k)
27748 .ks(3)
27749 .iterations(1)
27750 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
27751 }
27752 }
27753 }
27754 }
27755
27756 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, n_gt_4_small_kernel) {
27757 TEST_REQUIRES_X86_SSE41;
27758 for (uint32_t n = 5; n < 8; n++) {
27759 for (size_t k = 1; k <= 40; k += 9) {
27760 GemmMicrokernelTester()
27761 .mr(3)
27762 .nr(4)
27763 .kr(8)
27764 .sr(1)
27765 .m(3)
27766 .n(4)
27767 .k(k)
27768 .ks(3)
27769 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
27770 }
27771 }
27772 }
27773
27774 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, n_div_4_small_kernel) {
27775 TEST_REQUIRES_X86_SSE41;
27776 for (uint32_t n = 8; n <= 12; n += 4) {
27777 for (size_t k = 1; k <= 40; k += 9) {
27778 GemmMicrokernelTester()
27779 .mr(3)
27780 .nr(4)
27781 .kr(8)
27782 .sr(1)
27783 .m(3)
27784 .n(4)
27785 .k(k)
27786 .ks(3)
27787 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
27788 }
27789 }
27790 }
27791
27792 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, strided_cm_subtile) {
27793 TEST_REQUIRES_X86_SSE41;
27794 for (size_t k = 1; k <= 40; k += 9) {
27795 for (uint32_t m = 1; m <= 3; m++) {
27796 for (uint32_t n = 1; n <= 4; n++) {
27797 GemmMicrokernelTester()
27798 .mr(3)
27799 .nr(4)
27800 .kr(8)
27801 .sr(1)
27802 .m(m)
27803 .n(n)
27804 .k(k)
27805 .cm_stride(7)
27806 .iterations(1)
27807 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
27808 }
27809 }
27810 }
27811 }
27812
27813 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, a_offset) {
27814 TEST_REQUIRES_X86_SSE41;
27815 for (size_t k = 1; k <= 40; k += 9) {
27816 GemmMicrokernelTester()
27817 .mr(3)
27818 .nr(4)
27819 .kr(8)
27820 .sr(1)
27821 .m(3)
27822 .n(4)
27823 .k(k)
27824 .ks(3)
27825 .a_offset(127)
27826 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
27827 }
27828 }
27829
27830 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, zero) {
27831 TEST_REQUIRES_X86_SSE41;
27832 for (uint32_t mz = 0; mz < 3; mz++) {
27833 for (size_t k = 1; k <= 40; k += 9) {
27834 GemmMicrokernelTester()
27835 .mr(3)
27836 .nr(4)
27837 .kr(8)
27838 .sr(1)
27839 .m(3)
27840 .n(4)
27841 .k(k)
27842 .ks(3)
27843 .a_offset(127)
27844 .zero_index(mz)
27845 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
27846 }
27847 }
27848 }
27849
27850 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, qmin) {
27851 TEST_REQUIRES_X86_SSE41;
27852 GemmMicrokernelTester()
27853 .mr(3)
27854 .nr(4)
27855 .kr(8)
27856 .sr(1)
27857 .m(3)
27858 .n(4)
27859 .k(8)
27860 .qmin(128)
27861 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
27862 }
27863
27864 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, qmax) {
27865 TEST_REQUIRES_X86_SSE41;
27866 GemmMicrokernelTester()
27867 .mr(3)
27868 .nr(4)
27869 .kr(8)
27870 .sr(1)
27871 .m(3)
27872 .n(4)
27873 .k(8)
27874 .qmax(128)
27875 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
27876 }
27877
27878 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, strided_cm) {
27879 TEST_REQUIRES_X86_SSE41;
27880 GemmMicrokernelTester()
27881 .mr(3)
27882 .nr(4)
27883 .kr(8)
27884 .sr(1)
27885 .m(3)
27886 .n(4)
27887 .k(8)
27888 .cm_stride(7)
27889 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
27890 }
27891
27892 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, no_a_zero_point) {
27893 TEST_REQUIRES_X86_SSE41;
27894 for (size_t k = 1; k <= 40; k += 9) {
27895 GemmMicrokernelTester()
27896 .mr(3)
27897 .nr(4)
27898 .kr(8)
27899 .sr(1)
27900 .m(3)
27901 .n(4)
27902 .k(k)
27903 .a_zero_point(0)
27904 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
27905 }
27906 }
27907
27908 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, no_b_zero_point) {
27909 TEST_REQUIRES_X86_SSE41;
27910 for (size_t k = 1; k <= 40; k += 9) {
27911 GemmMicrokernelTester()
27912 .mr(3)
27913 .nr(4)
27914 .kr(8)
27915 .sr(1)
27916 .m(3)
27917 .n(4)
27918 .k(k)
27919 .b_zero_point(0)
27920 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
27921 }
27922 }
27923
27924 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, no_zero_point) {
27925 TEST_REQUIRES_X86_SSE41;
27926 for (size_t k = 1; k <= 40; k += 9) {
27927 GemmMicrokernelTester()
27928 .mr(3)
27929 .nr(4)
27930 .kr(8)
27931 .sr(1)
27932 .m(3)
27933 .n(4)
27934 .k(k)
27935 .a_zero_point(0)
27936 .b_zero_point(0)
27937 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
27938 }
27939 }
27940#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
27941
27942
27943#if XNN_ARCH_X86 || XNN_ARCH_X86_64
27944 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_eq_8) {
27945 TEST_REQUIRES_X86_AVX;
27946 GemmMicrokernelTester()
27947 .mr(1)
27948 .nr(4)
27949 .kr(8)
27950 .sr(1)
27951 .m(1)
27952 .n(4)
27953 .k(8)
27954 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
27955 }
27956
27957 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, strided_cn) {
27958 TEST_REQUIRES_X86_AVX;
27959 GemmMicrokernelTester()
27960 .mr(1)
27961 .nr(4)
27962 .kr(8)
27963 .sr(1)
27964 .m(1)
27965 .n(4)
27966 .k(8)
27967 .cn_stride(7)
27968 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
27969 }
27970
27971 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_eq_8_subtile) {
27972 TEST_REQUIRES_X86_AVX;
27973 for (uint32_t m = 1; m <= 1; m++) {
27974 for (uint32_t n = 1; n <= 4; n++) {
27975 GemmMicrokernelTester()
27976 .mr(1)
27977 .nr(4)
27978 .kr(8)
27979 .sr(1)
27980 .m(m)
27981 .n(n)
27982 .k(8)
27983 .iterations(1)
27984 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
27985 }
27986 }
27987 }
27988
27989 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_eq_8_subtile_m) {
27990 TEST_REQUIRES_X86_AVX;
27991 for (uint32_t m = 1; m <= 1; m++) {
27992 GemmMicrokernelTester()
27993 .mr(1)
27994 .nr(4)
27995 .kr(8)
27996 .sr(1)
27997 .m(m)
27998 .n(4)
27999 .k(8)
28000 .iterations(1)
28001 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
28002 }
28003 }
28004
28005 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_eq_8_subtile_n) {
28006 TEST_REQUIRES_X86_AVX;
28007 for (uint32_t n = 1; n <= 4; n++) {
28008 GemmMicrokernelTester()
28009 .mr(1)
28010 .nr(4)
28011 .kr(8)
28012 .sr(1)
28013 .m(1)
28014 .n(n)
28015 .k(8)
28016 .iterations(1)
28017 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
28018 }
28019 }
28020
28021 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_lt_8) {
28022 TEST_REQUIRES_X86_AVX;
28023 for (size_t k = 1; k < 8; k++) {
28024 GemmMicrokernelTester()
28025 .mr(1)
28026 .nr(4)
28027 .kr(8)
28028 .sr(1)
28029 .m(1)
28030 .n(4)
28031 .k(k)
28032 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
28033 }
28034 }
28035
28036 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_lt_8_subtile) {
28037 TEST_REQUIRES_X86_AVX;
28038 for (size_t k = 1; k < 8; k++) {
28039 for (uint32_t m = 1; m <= 1; m++) {
28040 for (uint32_t n = 1; n <= 4; n++) {
28041 GemmMicrokernelTester()
28042 .mr(1)
28043 .nr(4)
28044 .kr(8)
28045 .sr(1)
28046 .m(m)
28047 .n(n)
28048 .k(k)
28049 .iterations(1)
28050 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
28051 }
28052 }
28053 }
28054 }
28055
28056 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_gt_8) {
28057 TEST_REQUIRES_X86_AVX;
28058 for (size_t k = 9; k < 16; k++) {
28059 GemmMicrokernelTester()
28060 .mr(1)
28061 .nr(4)
28062 .kr(8)
28063 .sr(1)
28064 .m(1)
28065 .n(4)
28066 .k(k)
28067 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
28068 }
28069 }
28070
28071 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_gt_8_subtile) {
28072 TEST_REQUIRES_X86_AVX;
28073 for (size_t k = 9; k < 16; k++) {
28074 for (uint32_t m = 1; m <= 1; m++) {
28075 for (uint32_t n = 1; n <= 4; n++) {
28076 GemmMicrokernelTester()
28077 .mr(1)
28078 .nr(4)
28079 .kr(8)
28080 .sr(1)
28081 .m(m)
28082 .n(n)
28083 .k(k)
28084 .iterations(1)
28085 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
28086 }
28087 }
28088 }
28089 }
28090
28091 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_div_8) {
28092 TEST_REQUIRES_X86_AVX;
28093 for (size_t k = 16; k <= 80; k += 8) {
28094 GemmMicrokernelTester()
28095 .mr(1)
28096 .nr(4)
28097 .kr(8)
28098 .sr(1)
28099 .m(1)
28100 .n(4)
28101 .k(k)
28102 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
28103 }
28104 }
28105
28106 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_div_8_subtile) {
28107 TEST_REQUIRES_X86_AVX;
28108 for (size_t k = 16; k <= 80; k += 8) {
28109 for (uint32_t m = 1; m <= 1; m++) {
28110 for (uint32_t n = 1; n <= 4; n++) {
28111 GemmMicrokernelTester()
28112 .mr(1)
28113 .nr(4)
28114 .kr(8)
28115 .sr(1)
28116 .m(m)
28117 .n(n)
28118 .k(k)
28119 .iterations(1)
28120 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
28121 }
28122 }
28123 }
28124 }
28125
28126 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_gt_4) {
28127 TEST_REQUIRES_X86_AVX;
28128 for (uint32_t n = 5; n < 8; n++) {
28129 for (size_t k = 1; k <= 40; k += 9) {
28130 GemmMicrokernelTester()
28131 .mr(1)
28132 .nr(4)
28133 .kr(8)
28134 .sr(1)
28135 .m(1)
28136 .n(4)
28137 .k(k)
28138 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
28139 }
28140 }
28141 }
28142
28143 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_gt_4_strided_cn) {
28144 TEST_REQUIRES_X86_AVX;
28145 for (uint32_t n = 5; n < 8; n++) {
28146 for (size_t k = 1; k <= 40; k += 9) {
28147 GemmMicrokernelTester()
28148 .mr(1)
28149 .nr(4)
28150 .kr(8)
28151 .sr(1)
28152 .m(1)
28153 .n(4)
28154 .k(k)
28155 .cn_stride(7)
28156 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
28157 }
28158 }
28159 }
28160
28161 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_gt_4_subtile) {
28162 TEST_REQUIRES_X86_AVX;
28163 for (uint32_t n = 5; n < 8; n++) {
28164 for (size_t k = 1; k <= 40; k += 9) {
28165 for (uint32_t m = 1; m <= 1; m++) {
28166 GemmMicrokernelTester()
28167 .mr(1)
28168 .nr(4)
28169 .kr(8)
28170 .sr(1)
28171 .m(m)
28172 .n(n)
28173 .k(k)
28174 .iterations(1)
28175 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
28176 }
28177 }
28178 }
28179 }
28180
28181 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_div_4) {
28182 TEST_REQUIRES_X86_AVX;
28183 for (uint32_t n = 8; n <= 12; n += 4) {
28184 for (size_t k = 1; k <= 40; k += 9) {
28185 GemmMicrokernelTester()
28186 .mr(1)
28187 .nr(4)
28188 .kr(8)
28189 .sr(1)
28190 .m(1)
28191 .n(4)
28192 .k(k)
28193 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
28194 }
28195 }
28196 }
28197
28198 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_div_4_strided_cn) {
28199 TEST_REQUIRES_X86_AVX;
28200 for (uint32_t n = 8; n <= 12; n += 4) {
28201 for (size_t k = 1; k <= 40; k += 9) {
28202 GemmMicrokernelTester()
28203 .mr(1)
28204 .nr(4)
28205 .kr(8)
28206 .sr(1)
28207 .m(1)
28208 .n(n)
28209 .k(k)
28210 .cn_stride(7)
28211 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
28212 }
28213 }
28214 }
28215
28216 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_div_4_subtile) {
28217 TEST_REQUIRES_X86_AVX;
28218 for (uint32_t n = 8; n <= 12; n += 4) {
28219 for (size_t k = 1; k <= 40; k += 9) {
28220 for (uint32_t m = 1; m <= 1; m++) {
28221 GemmMicrokernelTester()
28222 .mr(1)
28223 .nr(4)
28224 .kr(8)
28225 .sr(1)
28226 .m(m)
28227 .n(n)
28228 .k(k)
28229 .iterations(1)
28230 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
28231 }
28232 }
28233 }
28234 }
28235
28236 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, small_kernel) {
28237 TEST_REQUIRES_X86_AVX;
28238 for (size_t k = 1; k <= 40; k += 9) {
28239 GemmMicrokernelTester()
28240 .mr(1)
28241 .nr(4)
28242 .kr(8)
28243 .sr(1)
28244 .m(1)
28245 .n(4)
28246 .k(k)
28247 .ks(3)
28248 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
28249 }
28250 }
28251
28252 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, small_kernel_subtile) {
28253 TEST_REQUIRES_X86_AVX;
28254 for (size_t k = 1; k <= 40; k += 9) {
28255 for (uint32_t m = 1; m <= 1; m++) {
28256 for (uint32_t n = 1; n <= 4; n++) {
28257 GemmMicrokernelTester()
28258 .mr(1)
28259 .nr(4)
28260 .kr(8)
28261 .sr(1)
28262 .m(m)
28263 .n(n)
28264 .k(k)
28265 .ks(3)
28266 .iterations(1)
28267 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
28268 }
28269 }
28270 }
28271 }
28272
28273 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_gt_4_small_kernel) {
28274 TEST_REQUIRES_X86_AVX;
28275 for (uint32_t n = 5; n < 8; n++) {
28276 for (size_t k = 1; k <= 40; k += 9) {
28277 GemmMicrokernelTester()
28278 .mr(1)
28279 .nr(4)
28280 .kr(8)
28281 .sr(1)
28282 .m(1)
28283 .n(4)
28284 .k(k)
28285 .ks(3)
28286 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
28287 }
28288 }
28289 }
28290
28291 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_div_4_small_kernel) {
28292 TEST_REQUIRES_X86_AVX;
28293 for (uint32_t n = 8; n <= 12; n += 4) {
28294 for (size_t k = 1; k <= 40; k += 9) {
28295 GemmMicrokernelTester()
28296 .mr(1)
28297 .nr(4)
28298 .kr(8)
28299 .sr(1)
28300 .m(1)
28301 .n(4)
28302 .k(k)
28303 .ks(3)
28304 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
28305 }
28306 }
28307 }
28308
28309 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, strided_cm_subtile) {
28310 TEST_REQUIRES_X86_AVX;
28311 for (size_t k = 1; k <= 40; k += 9) {
28312 for (uint32_t m = 1; m <= 1; m++) {
28313 for (uint32_t n = 1; n <= 4; n++) {
28314 GemmMicrokernelTester()
28315 .mr(1)
28316 .nr(4)
28317 .kr(8)
28318 .sr(1)
28319 .m(m)
28320 .n(n)
28321 .k(k)
28322 .cm_stride(7)
28323 .iterations(1)
28324 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
28325 }
28326 }
28327 }
28328 }
28329
28330 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, a_offset) {
28331 TEST_REQUIRES_X86_AVX;
28332 for (size_t k = 1; k <= 40; k += 9) {
28333 GemmMicrokernelTester()
28334 .mr(1)
28335 .nr(4)
28336 .kr(8)
28337 .sr(1)
28338 .m(1)
28339 .n(4)
28340 .k(k)
28341 .ks(3)
28342 .a_offset(43)
28343 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
28344 }
28345 }
28346
28347 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, zero) {
28348 TEST_REQUIRES_X86_AVX;
28349 for (uint32_t mz = 0; mz < 1; mz++) {
28350 for (size_t k = 1; k <= 40; k += 9) {
28351 GemmMicrokernelTester()
28352 .mr(1)
28353 .nr(4)
28354 .kr(8)
28355 .sr(1)
28356 .m(1)
28357 .n(4)
28358 .k(k)
28359 .ks(3)
28360 .a_offset(43)
28361 .zero_index(mz)
28362 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
28363 }
28364 }
28365 }
28366
28367 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, qmin) {
28368 TEST_REQUIRES_X86_AVX;
28369 GemmMicrokernelTester()
28370 .mr(1)
28371 .nr(4)
28372 .kr(8)
28373 .sr(1)
28374 .m(1)
28375 .n(4)
28376 .k(8)
28377 .qmin(128)
28378 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
28379 }
28380
28381 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, qmax) {
28382 TEST_REQUIRES_X86_AVX;
28383 GemmMicrokernelTester()
28384 .mr(1)
28385 .nr(4)
28386 .kr(8)
28387 .sr(1)
28388 .m(1)
28389 .n(4)
28390 .k(8)
28391 .qmax(128)
28392 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
28393 }
28394
28395 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, strided_cm) {
28396 TEST_REQUIRES_X86_AVX;
28397 GemmMicrokernelTester()
28398 .mr(1)
28399 .nr(4)
28400 .kr(8)
28401 .sr(1)
28402 .m(1)
28403 .n(4)
28404 .k(8)
28405 .cm_stride(7)
28406 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
28407 }
28408
28409 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, no_a_zero_point) {
28410 TEST_REQUIRES_X86_AVX;
28411 for (size_t k = 1; k <= 40; k += 9) {
28412 GemmMicrokernelTester()
28413 .mr(1)
28414 .nr(4)
28415 .kr(8)
28416 .sr(1)
28417 .m(1)
28418 .n(4)
28419 .k(k)
28420 .a_zero_point(0)
28421 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
28422 }
28423 }
28424
28425 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, no_b_zero_point) {
28426 TEST_REQUIRES_X86_AVX;
28427 for (size_t k = 1; k <= 40; k += 9) {
28428 GemmMicrokernelTester()
28429 .mr(1)
28430 .nr(4)
28431 .kr(8)
28432 .sr(1)
28433 .m(1)
28434 .n(4)
28435 .k(k)
28436 .b_zero_point(0)
28437 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
28438 }
28439 }
28440
28441 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, no_zero_point) {
28442 TEST_REQUIRES_X86_AVX;
28443 for (size_t k = 1; k <= 40; k += 9) {
28444 GemmMicrokernelTester()
28445 .mr(1)
28446 .nr(4)
28447 .kr(8)
28448 .sr(1)
28449 .m(1)
28450 .n(4)
28451 .k(k)
28452 .a_zero_point(0)
28453 .b_zero_point(0)
28454 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
28455 }
28456 }
28457#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
28458
28459
28460#if XNN_ARCH_X86 || XNN_ARCH_X86_64
28461 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_eq_8) {
28462 TEST_REQUIRES_X86_AVX;
28463 GemmMicrokernelTester()
28464 .mr(2)
28465 .nr(4)
28466 .kr(8)
28467 .sr(1)
28468 .m(2)
28469 .n(4)
28470 .k(8)
28471 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
28472 }
28473
28474 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, strided_cn) {
28475 TEST_REQUIRES_X86_AVX;
28476 GemmMicrokernelTester()
28477 .mr(2)
28478 .nr(4)
28479 .kr(8)
28480 .sr(1)
28481 .m(2)
28482 .n(4)
28483 .k(8)
28484 .cn_stride(7)
28485 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
28486 }
28487
28488 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_eq_8_subtile) {
28489 TEST_REQUIRES_X86_AVX;
28490 for (uint32_t m = 1; m <= 2; m++) {
28491 for (uint32_t n = 1; n <= 4; n++) {
28492 GemmMicrokernelTester()
28493 .mr(2)
28494 .nr(4)
28495 .kr(8)
28496 .sr(1)
28497 .m(m)
28498 .n(n)
28499 .k(8)
28500 .iterations(1)
28501 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
28502 }
28503 }
28504 }
28505
28506 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_eq_8_subtile_m) {
28507 TEST_REQUIRES_X86_AVX;
28508 for (uint32_t m = 1; m <= 2; m++) {
28509 GemmMicrokernelTester()
28510 .mr(2)
28511 .nr(4)
28512 .kr(8)
28513 .sr(1)
28514 .m(m)
28515 .n(4)
28516 .k(8)
28517 .iterations(1)
28518 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
28519 }
28520 }
28521
28522 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_eq_8_subtile_n) {
28523 TEST_REQUIRES_X86_AVX;
28524 for (uint32_t n = 1; n <= 4; n++) {
28525 GemmMicrokernelTester()
28526 .mr(2)
28527 .nr(4)
28528 .kr(8)
28529 .sr(1)
28530 .m(2)
28531 .n(n)
28532 .k(8)
28533 .iterations(1)
28534 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
28535 }
28536 }
28537
28538 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_lt_8) {
28539 TEST_REQUIRES_X86_AVX;
28540 for (size_t k = 1; k < 8; k++) {
28541 GemmMicrokernelTester()
28542 .mr(2)
28543 .nr(4)
28544 .kr(8)
28545 .sr(1)
28546 .m(2)
28547 .n(4)
28548 .k(k)
28549 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
28550 }
28551 }
28552
28553 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_lt_8_subtile) {
28554 TEST_REQUIRES_X86_AVX;
28555 for (size_t k = 1; k < 8; k++) {
28556 for (uint32_t m = 1; m <= 2; m++) {
28557 for (uint32_t n = 1; n <= 4; n++) {
28558 GemmMicrokernelTester()
28559 .mr(2)
28560 .nr(4)
28561 .kr(8)
28562 .sr(1)
28563 .m(m)
28564 .n(n)
28565 .k(k)
28566 .iterations(1)
28567 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
28568 }
28569 }
28570 }
28571 }
28572
28573 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_gt_8) {
28574 TEST_REQUIRES_X86_AVX;
28575 for (size_t k = 9; k < 16; k++) {
28576 GemmMicrokernelTester()
28577 .mr(2)
28578 .nr(4)
28579 .kr(8)
28580 .sr(1)
28581 .m(2)
28582 .n(4)
28583 .k(k)
28584 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
28585 }
28586 }
28587
28588 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_gt_8_subtile) {
28589 TEST_REQUIRES_X86_AVX;
28590 for (size_t k = 9; k < 16; k++) {
28591 for (uint32_t m = 1; m <= 2; m++) {
28592 for (uint32_t n = 1; n <= 4; n++) {
28593 GemmMicrokernelTester()
28594 .mr(2)
28595 .nr(4)
28596 .kr(8)
28597 .sr(1)
28598 .m(m)
28599 .n(n)
28600 .k(k)
28601 .iterations(1)
28602 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
28603 }
28604 }
28605 }
28606 }
28607
28608 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_div_8) {
28609 TEST_REQUIRES_X86_AVX;
28610 for (size_t k = 16; k <= 80; k += 8) {
28611 GemmMicrokernelTester()
28612 .mr(2)
28613 .nr(4)
28614 .kr(8)
28615 .sr(1)
28616 .m(2)
28617 .n(4)
28618 .k(k)
28619 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
28620 }
28621 }
28622
28623 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_div_8_subtile) {
28624 TEST_REQUIRES_X86_AVX;
28625 for (size_t k = 16; k <= 80; k += 8) {
28626 for (uint32_t m = 1; m <= 2; m++) {
28627 for (uint32_t n = 1; n <= 4; n++) {
28628 GemmMicrokernelTester()
28629 .mr(2)
28630 .nr(4)
28631 .kr(8)
28632 .sr(1)
28633 .m(m)
28634 .n(n)
28635 .k(k)
28636 .iterations(1)
28637 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
28638 }
28639 }
28640 }
28641 }
28642
28643 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_gt_4) {
28644 TEST_REQUIRES_X86_AVX;
28645 for (uint32_t n = 5; n < 8; n++) {
28646 for (size_t k = 1; k <= 40; k += 9) {
28647 GemmMicrokernelTester()
28648 .mr(2)
28649 .nr(4)
28650 .kr(8)
28651 .sr(1)
28652 .m(2)
28653 .n(4)
28654 .k(k)
28655 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
28656 }
28657 }
28658 }
28659
28660 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_gt_4_strided_cn) {
28661 TEST_REQUIRES_X86_AVX;
28662 for (uint32_t n = 5; n < 8; n++) {
28663 for (size_t k = 1; k <= 40; k += 9) {
28664 GemmMicrokernelTester()
28665 .mr(2)
28666 .nr(4)
28667 .kr(8)
28668 .sr(1)
28669 .m(2)
28670 .n(4)
28671 .k(k)
28672 .cn_stride(7)
28673 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
28674 }
28675 }
28676 }
28677
28678 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_gt_4_subtile) {
28679 TEST_REQUIRES_X86_AVX;
28680 for (uint32_t n = 5; n < 8; n++) {
28681 for (size_t k = 1; k <= 40; k += 9) {
28682 for (uint32_t m = 1; m <= 2; m++) {
28683 GemmMicrokernelTester()
28684 .mr(2)
28685 .nr(4)
28686 .kr(8)
28687 .sr(1)
28688 .m(m)
28689 .n(n)
28690 .k(k)
28691 .iterations(1)
28692 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
28693 }
28694 }
28695 }
28696 }
28697
28698 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_div_4) {
28699 TEST_REQUIRES_X86_AVX;
28700 for (uint32_t n = 8; n <= 12; n += 4) {
28701 for (size_t k = 1; k <= 40; k += 9) {
28702 GemmMicrokernelTester()
28703 .mr(2)
28704 .nr(4)
28705 .kr(8)
28706 .sr(1)
28707 .m(2)
28708 .n(4)
28709 .k(k)
28710 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
28711 }
28712 }
28713 }
28714
28715 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_div_4_strided_cn) {
28716 TEST_REQUIRES_X86_AVX;
28717 for (uint32_t n = 8; n <= 12; n += 4) {
28718 for (size_t k = 1; k <= 40; k += 9) {
28719 GemmMicrokernelTester()
28720 .mr(2)
28721 .nr(4)
28722 .kr(8)
28723 .sr(1)
28724 .m(2)
28725 .n(n)
28726 .k(k)
28727 .cn_stride(7)
28728 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
28729 }
28730 }
28731 }
28732
28733 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_div_4_subtile) {
28734 TEST_REQUIRES_X86_AVX;
28735 for (uint32_t n = 8; n <= 12; n += 4) {
28736 for (size_t k = 1; k <= 40; k += 9) {
28737 for (uint32_t m = 1; m <= 2; m++) {
28738 GemmMicrokernelTester()
28739 .mr(2)
28740 .nr(4)
28741 .kr(8)
28742 .sr(1)
28743 .m(m)
28744 .n(n)
28745 .k(k)
28746 .iterations(1)
28747 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
28748 }
28749 }
28750 }
28751 }
28752
28753 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, small_kernel) {
28754 TEST_REQUIRES_X86_AVX;
28755 for (size_t k = 1; k <= 40; k += 9) {
28756 GemmMicrokernelTester()
28757 .mr(2)
28758 .nr(4)
28759 .kr(8)
28760 .sr(1)
28761 .m(2)
28762 .n(4)
28763 .k(k)
28764 .ks(3)
28765 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
28766 }
28767 }
28768
28769 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, small_kernel_subtile) {
28770 TEST_REQUIRES_X86_AVX;
28771 for (size_t k = 1; k <= 40; k += 9) {
28772 for (uint32_t m = 1; m <= 2; m++) {
28773 for (uint32_t n = 1; n <= 4; n++) {
28774 GemmMicrokernelTester()
28775 .mr(2)
28776 .nr(4)
28777 .kr(8)
28778 .sr(1)
28779 .m(m)
28780 .n(n)
28781 .k(k)
28782 .ks(3)
28783 .iterations(1)
28784 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
28785 }
28786 }
28787 }
28788 }
28789
28790 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_gt_4_small_kernel) {
28791 TEST_REQUIRES_X86_AVX;
28792 for (uint32_t n = 5; n < 8; n++) {
28793 for (size_t k = 1; k <= 40; k += 9) {
28794 GemmMicrokernelTester()
28795 .mr(2)
28796 .nr(4)
28797 .kr(8)
28798 .sr(1)
28799 .m(2)
28800 .n(4)
28801 .k(k)
28802 .ks(3)
28803 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
28804 }
28805 }
28806 }
28807
28808 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_div_4_small_kernel) {
28809 TEST_REQUIRES_X86_AVX;
28810 for (uint32_t n = 8; n <= 12; n += 4) {
28811 for (size_t k = 1; k <= 40; k += 9) {
28812 GemmMicrokernelTester()
28813 .mr(2)
28814 .nr(4)
28815 .kr(8)
28816 .sr(1)
28817 .m(2)
28818 .n(4)
28819 .k(k)
28820 .ks(3)
28821 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
28822 }
28823 }
28824 }
28825
28826 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, strided_cm_subtile) {
28827 TEST_REQUIRES_X86_AVX;
28828 for (size_t k = 1; k <= 40; k += 9) {
28829 for (uint32_t m = 1; m <= 2; m++) {
28830 for (uint32_t n = 1; n <= 4; n++) {
28831 GemmMicrokernelTester()
28832 .mr(2)
28833 .nr(4)
28834 .kr(8)
28835 .sr(1)
28836 .m(m)
28837 .n(n)
28838 .k(k)
28839 .cm_stride(7)
28840 .iterations(1)
28841 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
28842 }
28843 }
28844 }
28845 }
28846
28847 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, a_offset) {
28848 TEST_REQUIRES_X86_AVX;
28849 for (size_t k = 1; k <= 40; k += 9) {
28850 GemmMicrokernelTester()
28851 .mr(2)
28852 .nr(4)
28853 .kr(8)
28854 .sr(1)
28855 .m(2)
28856 .n(4)
28857 .k(k)
28858 .ks(3)
28859 .a_offset(83)
28860 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
28861 }
28862 }
28863
28864 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, zero) {
28865 TEST_REQUIRES_X86_AVX;
28866 for (uint32_t mz = 0; mz < 2; mz++) {
28867 for (size_t k = 1; k <= 40; k += 9) {
28868 GemmMicrokernelTester()
28869 .mr(2)
28870 .nr(4)
28871 .kr(8)
28872 .sr(1)
28873 .m(2)
28874 .n(4)
28875 .k(k)
28876 .ks(3)
28877 .a_offset(83)
28878 .zero_index(mz)
28879 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
28880 }
28881 }
28882 }
28883
28884 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, qmin) {
28885 TEST_REQUIRES_X86_AVX;
28886 GemmMicrokernelTester()
28887 .mr(2)
28888 .nr(4)
28889 .kr(8)
28890 .sr(1)
28891 .m(2)
28892 .n(4)
28893 .k(8)
28894 .qmin(128)
28895 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
28896 }
28897
28898 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, qmax) {
28899 TEST_REQUIRES_X86_AVX;
28900 GemmMicrokernelTester()
28901 .mr(2)
28902 .nr(4)
28903 .kr(8)
28904 .sr(1)
28905 .m(2)
28906 .n(4)
28907 .k(8)
28908 .qmax(128)
28909 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
28910 }
28911
28912 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, strided_cm) {
28913 TEST_REQUIRES_X86_AVX;
28914 GemmMicrokernelTester()
28915 .mr(2)
28916 .nr(4)
28917 .kr(8)
28918 .sr(1)
28919 .m(2)
28920 .n(4)
28921 .k(8)
28922 .cm_stride(7)
28923 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
28924 }
28925
28926 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, no_a_zero_point) {
28927 TEST_REQUIRES_X86_AVX;
28928 for (size_t k = 1; k <= 40; k += 9) {
28929 GemmMicrokernelTester()
28930 .mr(2)
28931 .nr(4)
28932 .kr(8)
28933 .sr(1)
28934 .m(2)
28935 .n(4)
28936 .k(k)
28937 .a_zero_point(0)
28938 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
28939 }
28940 }
28941
28942 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, no_b_zero_point) {
28943 TEST_REQUIRES_X86_AVX;
28944 for (size_t k = 1; k <= 40; k += 9) {
28945 GemmMicrokernelTester()
28946 .mr(2)
28947 .nr(4)
28948 .kr(8)
28949 .sr(1)
28950 .m(2)
28951 .n(4)
28952 .k(k)
28953 .b_zero_point(0)
28954 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
28955 }
28956 }
28957
28958 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, no_zero_point) {
28959 TEST_REQUIRES_X86_AVX;
28960 for (size_t k = 1; k <= 40; k += 9) {
28961 GemmMicrokernelTester()
28962 .mr(2)
28963 .nr(4)
28964 .kr(8)
28965 .sr(1)
28966 .m(2)
28967 .n(4)
28968 .k(k)
28969 .a_zero_point(0)
28970 .b_zero_point(0)
28971 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
28972 }
28973 }
28974#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
28975
28976
28977#if XNN_ARCH_X86 || XNN_ARCH_X86_64
28978 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_eq_8) {
28979 TEST_REQUIRES_X86_AVX;
28980 GemmMicrokernelTester()
28981 .mr(3)
28982 .nr(4)
28983 .kr(8)
28984 .sr(1)
28985 .m(3)
28986 .n(4)
28987 .k(8)
28988 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
28989 }
28990
28991 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, strided_cn) {
28992 TEST_REQUIRES_X86_AVX;
28993 GemmMicrokernelTester()
28994 .mr(3)
28995 .nr(4)
28996 .kr(8)
28997 .sr(1)
28998 .m(3)
28999 .n(4)
29000 .k(8)
29001 .cn_stride(7)
29002 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
29003 }
29004
29005 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_eq_8_subtile) {
29006 TEST_REQUIRES_X86_AVX;
29007 for (uint32_t m = 1; m <= 3; m++) {
29008 for (uint32_t n = 1; n <= 4; n++) {
29009 GemmMicrokernelTester()
29010 .mr(3)
29011 .nr(4)
29012 .kr(8)
29013 .sr(1)
29014 .m(m)
29015 .n(n)
29016 .k(8)
29017 .iterations(1)
29018 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
29019 }
29020 }
29021 }
29022
29023 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_eq_8_subtile_m) {
29024 TEST_REQUIRES_X86_AVX;
29025 for (uint32_t m = 1; m <= 3; m++) {
29026 GemmMicrokernelTester()
29027 .mr(3)
29028 .nr(4)
29029 .kr(8)
29030 .sr(1)
29031 .m(m)
29032 .n(4)
29033 .k(8)
29034 .iterations(1)
29035 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
29036 }
29037 }
29038
29039 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_eq_8_subtile_n) {
29040 TEST_REQUIRES_X86_AVX;
29041 for (uint32_t n = 1; n <= 4; n++) {
29042 GemmMicrokernelTester()
29043 .mr(3)
29044 .nr(4)
29045 .kr(8)
29046 .sr(1)
29047 .m(3)
29048 .n(n)
29049 .k(8)
29050 .iterations(1)
29051 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
29052 }
29053 }
29054
29055 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_lt_8) {
29056 TEST_REQUIRES_X86_AVX;
29057 for (size_t k = 1; k < 8; k++) {
29058 GemmMicrokernelTester()
29059 .mr(3)
29060 .nr(4)
29061 .kr(8)
29062 .sr(1)
29063 .m(3)
29064 .n(4)
29065 .k(k)
29066 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
29067 }
29068 }
29069
29070 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_lt_8_subtile) {
29071 TEST_REQUIRES_X86_AVX;
29072 for (size_t k = 1; k < 8; k++) {
29073 for (uint32_t m = 1; m <= 3; m++) {
29074 for (uint32_t n = 1; n <= 4; n++) {
29075 GemmMicrokernelTester()
29076 .mr(3)
29077 .nr(4)
29078 .kr(8)
29079 .sr(1)
29080 .m(m)
29081 .n(n)
29082 .k(k)
29083 .iterations(1)
29084 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
29085 }
29086 }
29087 }
29088 }
29089
29090 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_gt_8) {
29091 TEST_REQUIRES_X86_AVX;
29092 for (size_t k = 9; k < 16; k++) {
29093 GemmMicrokernelTester()
29094 .mr(3)
29095 .nr(4)
29096 .kr(8)
29097 .sr(1)
29098 .m(3)
29099 .n(4)
29100 .k(k)
29101 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
29102 }
29103 }
29104
29105 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_gt_8_subtile) {
29106 TEST_REQUIRES_X86_AVX;
29107 for (size_t k = 9; k < 16; k++) {
29108 for (uint32_t m = 1; m <= 3; m++) {
29109 for (uint32_t n = 1; n <= 4; n++) {
29110 GemmMicrokernelTester()
29111 .mr(3)
29112 .nr(4)
29113 .kr(8)
29114 .sr(1)
29115 .m(m)
29116 .n(n)
29117 .k(k)
29118 .iterations(1)
29119 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
29120 }
29121 }
29122 }
29123 }
29124
29125 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_div_8) {
29126 TEST_REQUIRES_X86_AVX;
29127 for (size_t k = 16; k <= 80; k += 8) {
29128 GemmMicrokernelTester()
29129 .mr(3)
29130 .nr(4)
29131 .kr(8)
29132 .sr(1)
29133 .m(3)
29134 .n(4)
29135 .k(k)
29136 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
29137 }
29138 }
29139
29140 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_div_8_subtile) {
29141 TEST_REQUIRES_X86_AVX;
29142 for (size_t k = 16; k <= 80; k += 8) {
29143 for (uint32_t m = 1; m <= 3; m++) {
29144 for (uint32_t n = 1; n <= 4; n++) {
29145 GemmMicrokernelTester()
29146 .mr(3)
29147 .nr(4)
29148 .kr(8)
29149 .sr(1)
29150 .m(m)
29151 .n(n)
29152 .k(k)
29153 .iterations(1)
29154 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
29155 }
29156 }
29157 }
29158 }
29159
29160 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, n_gt_4) {
29161 TEST_REQUIRES_X86_AVX;
29162 for (uint32_t n = 5; n < 8; n++) {
29163 for (size_t k = 1; k <= 40; k += 9) {
29164 GemmMicrokernelTester()
29165 .mr(3)
29166 .nr(4)
29167 .kr(8)
29168 .sr(1)
29169 .m(3)
29170 .n(4)
29171 .k(k)
29172 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
29173 }
29174 }
29175 }
29176
29177 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, n_gt_4_strided_cn) {
29178 TEST_REQUIRES_X86_AVX;
29179 for (uint32_t n = 5; n < 8; n++) {
29180 for (size_t k = 1; k <= 40; k += 9) {
29181 GemmMicrokernelTester()
29182 .mr(3)
29183 .nr(4)
29184 .kr(8)
29185 .sr(1)
29186 .m(3)
29187 .n(4)
29188 .k(k)
29189 .cn_stride(7)
29190 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
29191 }
29192 }
29193 }
29194
29195 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, n_gt_4_subtile) {
29196 TEST_REQUIRES_X86_AVX;
29197 for (uint32_t n = 5; n < 8; n++) {
29198 for (size_t k = 1; k <= 40; k += 9) {
29199 for (uint32_t m = 1; m <= 3; m++) {
29200 GemmMicrokernelTester()
29201 .mr(3)
29202 .nr(4)
29203 .kr(8)
29204 .sr(1)
29205 .m(m)
29206 .n(n)
29207 .k(k)
29208 .iterations(1)
29209 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
29210 }
29211 }
29212 }
29213 }
29214
29215 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, n_div_4) {
29216 TEST_REQUIRES_X86_AVX;
29217 for (uint32_t n = 8; n <= 12; n += 4) {
29218 for (size_t k = 1; k <= 40; k += 9) {
29219 GemmMicrokernelTester()
29220 .mr(3)
29221 .nr(4)
29222 .kr(8)
29223 .sr(1)
29224 .m(3)
29225 .n(4)
29226 .k(k)
29227 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
29228 }
29229 }
29230 }
29231
29232 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, n_div_4_strided_cn) {
29233 TEST_REQUIRES_X86_AVX;
29234 for (uint32_t n = 8; n <= 12; n += 4) {
29235 for (size_t k = 1; k <= 40; k += 9) {
29236 GemmMicrokernelTester()
29237 .mr(3)
29238 .nr(4)
29239 .kr(8)
29240 .sr(1)
29241 .m(3)
29242 .n(n)
29243 .k(k)
29244 .cn_stride(7)
29245 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
29246 }
29247 }
29248 }
29249
29250 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, n_div_4_subtile) {
29251 TEST_REQUIRES_X86_AVX;
29252 for (uint32_t n = 8; n <= 12; n += 4) {
29253 for (size_t k = 1; k <= 40; k += 9) {
29254 for (uint32_t m = 1; m <= 3; m++) {
29255 GemmMicrokernelTester()
29256 .mr(3)
29257 .nr(4)
29258 .kr(8)
29259 .sr(1)
29260 .m(m)
29261 .n(n)
29262 .k(k)
29263 .iterations(1)
29264 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
29265 }
29266 }
29267 }
29268 }
29269
29270 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, small_kernel) {
29271 TEST_REQUIRES_X86_AVX;
29272 for (size_t k = 1; k <= 40; k += 9) {
29273 GemmMicrokernelTester()
29274 .mr(3)
29275 .nr(4)
29276 .kr(8)
29277 .sr(1)
29278 .m(3)
29279 .n(4)
29280 .k(k)
29281 .ks(3)
29282 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
29283 }
29284 }
29285
29286 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, small_kernel_subtile) {
29287 TEST_REQUIRES_X86_AVX;
29288 for (size_t k = 1; k <= 40; k += 9) {
29289 for (uint32_t m = 1; m <= 3; m++) {
29290 for (uint32_t n = 1; n <= 4; n++) {
29291 GemmMicrokernelTester()
29292 .mr(3)
29293 .nr(4)
29294 .kr(8)
29295 .sr(1)
29296 .m(m)
29297 .n(n)
29298 .k(k)
29299 .ks(3)
29300 .iterations(1)
29301 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
29302 }
29303 }
29304 }
29305 }
29306
29307 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, n_gt_4_small_kernel) {
29308 TEST_REQUIRES_X86_AVX;
29309 for (uint32_t n = 5; n < 8; n++) {
29310 for (size_t k = 1; k <= 40; k += 9) {
29311 GemmMicrokernelTester()
29312 .mr(3)
29313 .nr(4)
29314 .kr(8)
29315 .sr(1)
29316 .m(3)
29317 .n(4)
29318 .k(k)
29319 .ks(3)
29320 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
29321 }
29322 }
29323 }
29324
29325 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, n_div_4_small_kernel) {
29326 TEST_REQUIRES_X86_AVX;
29327 for (uint32_t n = 8; n <= 12; n += 4) {
29328 for (size_t k = 1; k <= 40; k += 9) {
29329 GemmMicrokernelTester()
29330 .mr(3)
29331 .nr(4)
29332 .kr(8)
29333 .sr(1)
29334 .m(3)
29335 .n(4)
29336 .k(k)
29337 .ks(3)
29338 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
29339 }
29340 }
29341 }
29342
29343 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, strided_cm_subtile) {
29344 TEST_REQUIRES_X86_AVX;
29345 for (size_t k = 1; k <= 40; k += 9) {
29346 for (uint32_t m = 1; m <= 3; m++) {
29347 for (uint32_t n = 1; n <= 4; n++) {
29348 GemmMicrokernelTester()
29349 .mr(3)
29350 .nr(4)
29351 .kr(8)
29352 .sr(1)
29353 .m(m)
29354 .n(n)
29355 .k(k)
29356 .cm_stride(7)
29357 .iterations(1)
29358 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
29359 }
29360 }
29361 }
29362 }
29363
29364 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, a_offset) {
29365 TEST_REQUIRES_X86_AVX;
29366 for (size_t k = 1; k <= 40; k += 9) {
29367 GemmMicrokernelTester()
29368 .mr(3)
29369 .nr(4)
29370 .kr(8)
29371 .sr(1)
29372 .m(3)
29373 .n(4)
29374 .k(k)
29375 .ks(3)
29376 .a_offset(127)
29377 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
29378 }
29379 }
29380
29381 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, zero) {
29382 TEST_REQUIRES_X86_AVX;
29383 for (uint32_t mz = 0; mz < 3; mz++) {
29384 for (size_t k = 1; k <= 40; k += 9) {
29385 GemmMicrokernelTester()
29386 .mr(3)
29387 .nr(4)
29388 .kr(8)
29389 .sr(1)
29390 .m(3)
29391 .n(4)
29392 .k(k)
29393 .ks(3)
29394 .a_offset(127)
29395 .zero_index(mz)
29396 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
29397 }
29398 }
29399 }
29400
29401 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, qmin) {
29402 TEST_REQUIRES_X86_AVX;
29403 GemmMicrokernelTester()
29404 .mr(3)
29405 .nr(4)
29406 .kr(8)
29407 .sr(1)
29408 .m(3)
29409 .n(4)
29410 .k(8)
29411 .qmin(128)
29412 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
29413 }
29414
29415 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, qmax) {
29416 TEST_REQUIRES_X86_AVX;
29417 GemmMicrokernelTester()
29418 .mr(3)
29419 .nr(4)
29420 .kr(8)
29421 .sr(1)
29422 .m(3)
29423 .n(4)
29424 .k(8)
29425 .qmax(128)
29426 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
29427 }
29428
29429 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, strided_cm) {
29430 TEST_REQUIRES_X86_AVX;
29431 GemmMicrokernelTester()
29432 .mr(3)
29433 .nr(4)
29434 .kr(8)
29435 .sr(1)
29436 .m(3)
29437 .n(4)
29438 .k(8)
29439 .cm_stride(7)
29440 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
29441 }
29442
29443 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, no_a_zero_point) {
29444 TEST_REQUIRES_X86_AVX;
29445 for (size_t k = 1; k <= 40; k += 9) {
29446 GemmMicrokernelTester()
29447 .mr(3)
29448 .nr(4)
29449 .kr(8)
29450 .sr(1)
29451 .m(3)
29452 .n(4)
29453 .k(k)
29454 .a_zero_point(0)
29455 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
29456 }
29457 }
29458
29459 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, no_b_zero_point) {
29460 TEST_REQUIRES_X86_AVX;
29461 for (size_t k = 1; k <= 40; k += 9) {
29462 GemmMicrokernelTester()
29463 .mr(3)
29464 .nr(4)
29465 .kr(8)
29466 .sr(1)
29467 .m(3)
29468 .n(4)
29469 .k(k)
29470 .b_zero_point(0)
29471 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
29472 }
29473 }
29474
29475 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, no_zero_point) {
29476 TEST_REQUIRES_X86_AVX;
29477 for (size_t k = 1; k <= 40; k += 9) {
29478 GemmMicrokernelTester()
29479 .mr(3)
29480 .nr(4)
29481 .kr(8)
29482 .sr(1)
29483 .m(3)
29484 .n(4)
29485 .k(k)
29486 .a_zero_point(0)
29487 .b_zero_point(0)
29488 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
29489 }
29490 }
29491#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
29492
29493
29494#if XNN_ARCH_X86 || XNN_ARCH_X86_64
29495 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_eq_8) {
29496 TEST_REQUIRES_X86_XOP;
29497 GemmMicrokernelTester()
29498 .mr(1)
29499 .nr(4)
29500 .kr(8)
29501 .sr(1)
29502 .m(1)
29503 .n(4)
29504 .k(8)
29505 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
29506 }
29507
29508 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, strided_cn) {
29509 TEST_REQUIRES_X86_XOP;
29510 GemmMicrokernelTester()
29511 .mr(1)
29512 .nr(4)
29513 .kr(8)
29514 .sr(1)
29515 .m(1)
29516 .n(4)
29517 .k(8)
29518 .cn_stride(7)
29519 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
29520 }
29521
29522 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_eq_8_subtile) {
29523 TEST_REQUIRES_X86_XOP;
29524 for (uint32_t m = 1; m <= 1; m++) {
29525 for (uint32_t n = 1; n <= 4; n++) {
29526 GemmMicrokernelTester()
29527 .mr(1)
29528 .nr(4)
29529 .kr(8)
29530 .sr(1)
29531 .m(m)
29532 .n(n)
29533 .k(8)
29534 .iterations(1)
29535 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
29536 }
29537 }
29538 }
29539
29540 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_eq_8_subtile_m) {
29541 TEST_REQUIRES_X86_XOP;
29542 for (uint32_t m = 1; m <= 1; m++) {
29543 GemmMicrokernelTester()
29544 .mr(1)
29545 .nr(4)
29546 .kr(8)
29547 .sr(1)
29548 .m(m)
29549 .n(4)
29550 .k(8)
29551 .iterations(1)
29552 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
29553 }
29554 }
29555
29556 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_eq_8_subtile_n) {
29557 TEST_REQUIRES_X86_XOP;
29558 for (uint32_t n = 1; n <= 4; n++) {
29559 GemmMicrokernelTester()
29560 .mr(1)
29561 .nr(4)
29562 .kr(8)
29563 .sr(1)
29564 .m(1)
29565 .n(n)
29566 .k(8)
29567 .iterations(1)
29568 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
29569 }
29570 }
29571
29572 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_lt_8) {
29573 TEST_REQUIRES_X86_XOP;
29574 for (size_t k = 1; k < 8; k++) {
29575 GemmMicrokernelTester()
29576 .mr(1)
29577 .nr(4)
29578 .kr(8)
29579 .sr(1)
29580 .m(1)
29581 .n(4)
29582 .k(k)
29583 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
29584 }
29585 }
29586
29587 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_lt_8_subtile) {
29588 TEST_REQUIRES_X86_XOP;
29589 for (size_t k = 1; k < 8; k++) {
29590 for (uint32_t m = 1; m <= 1; m++) {
29591 for (uint32_t n = 1; n <= 4; n++) {
29592 GemmMicrokernelTester()
29593 .mr(1)
29594 .nr(4)
29595 .kr(8)
29596 .sr(1)
29597 .m(m)
29598 .n(n)
29599 .k(k)
29600 .iterations(1)
29601 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
29602 }
29603 }
29604 }
29605 }
29606
29607 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_gt_8) {
29608 TEST_REQUIRES_X86_XOP;
29609 for (size_t k = 9; k < 16; k++) {
29610 GemmMicrokernelTester()
29611 .mr(1)
29612 .nr(4)
29613 .kr(8)
29614 .sr(1)
29615 .m(1)
29616 .n(4)
29617 .k(k)
29618 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
29619 }
29620 }
29621
29622 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_gt_8_subtile) {
29623 TEST_REQUIRES_X86_XOP;
29624 for (size_t k = 9; k < 16; k++) {
29625 for (uint32_t m = 1; m <= 1; m++) {
29626 for (uint32_t n = 1; n <= 4; n++) {
29627 GemmMicrokernelTester()
29628 .mr(1)
29629 .nr(4)
29630 .kr(8)
29631 .sr(1)
29632 .m(m)
29633 .n(n)
29634 .k(k)
29635 .iterations(1)
29636 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
29637 }
29638 }
29639 }
29640 }
29641
29642 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_div_8) {
29643 TEST_REQUIRES_X86_XOP;
29644 for (size_t k = 16; k <= 80; k += 8) {
29645 GemmMicrokernelTester()
29646 .mr(1)
29647 .nr(4)
29648 .kr(8)
29649 .sr(1)
29650 .m(1)
29651 .n(4)
29652 .k(k)
29653 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
29654 }
29655 }
29656
29657 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_div_8_subtile) {
29658 TEST_REQUIRES_X86_XOP;
29659 for (size_t k = 16; k <= 80; k += 8) {
29660 for (uint32_t m = 1; m <= 1; m++) {
29661 for (uint32_t n = 1; n <= 4; n++) {
29662 GemmMicrokernelTester()
29663 .mr(1)
29664 .nr(4)
29665 .kr(8)
29666 .sr(1)
29667 .m(m)
29668 .n(n)
29669 .k(k)
29670 .iterations(1)
29671 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
29672 }
29673 }
29674 }
29675 }
29676
29677 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, n_gt_4) {
29678 TEST_REQUIRES_X86_XOP;
29679 for (uint32_t n = 5; n < 8; n++) {
29680 for (size_t k = 1; k <= 40; k += 9) {
29681 GemmMicrokernelTester()
29682 .mr(1)
29683 .nr(4)
29684 .kr(8)
29685 .sr(1)
29686 .m(1)
29687 .n(4)
29688 .k(k)
29689 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
29690 }
29691 }
29692 }
29693
29694 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, n_gt_4_strided_cn) {
29695 TEST_REQUIRES_X86_XOP;
29696 for (uint32_t n = 5; n < 8; n++) {
29697 for (size_t k = 1; k <= 40; k += 9) {
29698 GemmMicrokernelTester()
29699 .mr(1)
29700 .nr(4)
29701 .kr(8)
29702 .sr(1)
29703 .m(1)
29704 .n(4)
29705 .k(k)
29706 .cn_stride(7)
29707 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
29708 }
29709 }
29710 }
29711
29712 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, n_gt_4_subtile) {
29713 TEST_REQUIRES_X86_XOP;
29714 for (uint32_t n = 5; n < 8; n++) {
29715 for (size_t k = 1; k <= 40; k += 9) {
29716 for (uint32_t m = 1; m <= 1; m++) {
29717 GemmMicrokernelTester()
29718 .mr(1)
29719 .nr(4)
29720 .kr(8)
29721 .sr(1)
29722 .m(m)
29723 .n(n)
29724 .k(k)
29725 .iterations(1)
29726 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
29727 }
29728 }
29729 }
29730 }
29731
29732 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, n_div_4) {
29733 TEST_REQUIRES_X86_XOP;
29734 for (uint32_t n = 8; n <= 12; n += 4) {
29735 for (size_t k = 1; k <= 40; k += 9) {
29736 GemmMicrokernelTester()
29737 .mr(1)
29738 .nr(4)
29739 .kr(8)
29740 .sr(1)
29741 .m(1)
29742 .n(4)
29743 .k(k)
29744 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
29745 }
29746 }
29747 }
29748
29749 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, n_div_4_strided_cn) {
29750 TEST_REQUIRES_X86_XOP;
29751 for (uint32_t n = 8; n <= 12; n += 4) {
29752 for (size_t k = 1; k <= 40; k += 9) {
29753 GemmMicrokernelTester()
29754 .mr(1)
29755 .nr(4)
29756 .kr(8)
29757 .sr(1)
29758 .m(1)
29759 .n(n)
29760 .k(k)
29761 .cn_stride(7)
29762 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
29763 }
29764 }
29765 }
29766
29767 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, n_div_4_subtile) {
29768 TEST_REQUIRES_X86_XOP;
29769 for (uint32_t n = 8; n <= 12; n += 4) {
29770 for (size_t k = 1; k <= 40; k += 9) {
29771 for (uint32_t m = 1; m <= 1; m++) {
29772 GemmMicrokernelTester()
29773 .mr(1)
29774 .nr(4)
29775 .kr(8)
29776 .sr(1)
29777 .m(m)
29778 .n(n)
29779 .k(k)
29780 .iterations(1)
29781 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
29782 }
29783 }
29784 }
29785 }
29786
29787 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, small_kernel) {
29788 TEST_REQUIRES_X86_XOP;
29789 for (size_t k = 1; k <= 40; k += 9) {
29790 GemmMicrokernelTester()
29791 .mr(1)
29792 .nr(4)
29793 .kr(8)
29794 .sr(1)
29795 .m(1)
29796 .n(4)
29797 .k(k)
29798 .ks(3)
29799 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
29800 }
29801 }
29802
29803 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, small_kernel_subtile) {
29804 TEST_REQUIRES_X86_XOP;
29805 for (size_t k = 1; k <= 40; k += 9) {
29806 for (uint32_t m = 1; m <= 1; m++) {
29807 for (uint32_t n = 1; n <= 4; n++) {
29808 GemmMicrokernelTester()
29809 .mr(1)
29810 .nr(4)
29811 .kr(8)
29812 .sr(1)
29813 .m(m)
29814 .n(n)
29815 .k(k)
29816 .ks(3)
29817 .iterations(1)
29818 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
29819 }
29820 }
29821 }
29822 }
29823
29824 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, n_gt_4_small_kernel) {
29825 TEST_REQUIRES_X86_XOP;
29826 for (uint32_t n = 5; n < 8; n++) {
29827 for (size_t k = 1; k <= 40; k += 9) {
29828 GemmMicrokernelTester()
29829 .mr(1)
29830 .nr(4)
29831 .kr(8)
29832 .sr(1)
29833 .m(1)
29834 .n(4)
29835 .k(k)
29836 .ks(3)
29837 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
29838 }
29839 }
29840 }
29841
29842 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, n_div_4_small_kernel) {
29843 TEST_REQUIRES_X86_XOP;
29844 for (uint32_t n = 8; n <= 12; n += 4) {
29845 for (size_t k = 1; k <= 40; k += 9) {
29846 GemmMicrokernelTester()
29847 .mr(1)
29848 .nr(4)
29849 .kr(8)
29850 .sr(1)
29851 .m(1)
29852 .n(4)
29853 .k(k)
29854 .ks(3)
29855 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
29856 }
29857 }
29858 }
29859
29860 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, strided_cm_subtile) {
29861 TEST_REQUIRES_X86_XOP;
29862 for (size_t k = 1; k <= 40; k += 9) {
29863 for (uint32_t m = 1; m <= 1; m++) {
29864 for (uint32_t n = 1; n <= 4; n++) {
29865 GemmMicrokernelTester()
29866 .mr(1)
29867 .nr(4)
29868 .kr(8)
29869 .sr(1)
29870 .m(m)
29871 .n(n)
29872 .k(k)
29873 .cm_stride(7)
29874 .iterations(1)
29875 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
29876 }
29877 }
29878 }
29879 }
29880
29881 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, a_offset) {
29882 TEST_REQUIRES_X86_XOP;
29883 for (size_t k = 1; k <= 40; k += 9) {
29884 GemmMicrokernelTester()
29885 .mr(1)
29886 .nr(4)
29887 .kr(8)
29888 .sr(1)
29889 .m(1)
29890 .n(4)
29891 .k(k)
29892 .ks(3)
29893 .a_offset(43)
29894 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
29895 }
29896 }
29897
29898 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, zero) {
29899 TEST_REQUIRES_X86_XOP;
29900 for (uint32_t mz = 0; mz < 1; mz++) {
29901 for (size_t k = 1; k <= 40; k += 9) {
29902 GemmMicrokernelTester()
29903 .mr(1)
29904 .nr(4)
29905 .kr(8)
29906 .sr(1)
29907 .m(1)
29908 .n(4)
29909 .k(k)
29910 .ks(3)
29911 .a_offset(43)
29912 .zero_index(mz)
29913 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
29914 }
29915 }
29916 }
29917
29918 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, qmin) {
29919 TEST_REQUIRES_X86_XOP;
29920 GemmMicrokernelTester()
29921 .mr(1)
29922 .nr(4)
29923 .kr(8)
29924 .sr(1)
29925 .m(1)
29926 .n(4)
29927 .k(8)
29928 .qmin(128)
29929 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
29930 }
29931
29932 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, qmax) {
29933 TEST_REQUIRES_X86_XOP;
29934 GemmMicrokernelTester()
29935 .mr(1)
29936 .nr(4)
29937 .kr(8)
29938 .sr(1)
29939 .m(1)
29940 .n(4)
29941 .k(8)
29942 .qmax(128)
29943 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
29944 }
29945
29946 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, strided_cm) {
29947 TEST_REQUIRES_X86_XOP;
29948 GemmMicrokernelTester()
29949 .mr(1)
29950 .nr(4)
29951 .kr(8)
29952 .sr(1)
29953 .m(1)
29954 .n(4)
29955 .k(8)
29956 .cm_stride(7)
29957 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
29958 }
29959
29960 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, no_a_zero_point) {
29961 TEST_REQUIRES_X86_XOP;
29962 for (size_t k = 1; k <= 40; k += 9) {
29963 GemmMicrokernelTester()
29964 .mr(1)
29965 .nr(4)
29966 .kr(8)
29967 .sr(1)
29968 .m(1)
29969 .n(4)
29970 .k(k)
29971 .a_zero_point(0)
29972 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
29973 }
29974 }
29975
29976 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, no_b_zero_point) {
29977 TEST_REQUIRES_X86_XOP;
29978 for (size_t k = 1; k <= 40; k += 9) {
29979 GemmMicrokernelTester()
29980 .mr(1)
29981 .nr(4)
29982 .kr(8)
29983 .sr(1)
29984 .m(1)
29985 .n(4)
29986 .k(k)
29987 .b_zero_point(0)
29988 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
29989 }
29990 }
29991
29992 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, no_zero_point) {
29993 TEST_REQUIRES_X86_XOP;
29994 for (size_t k = 1; k <= 40; k += 9) {
29995 GemmMicrokernelTester()
29996 .mr(1)
29997 .nr(4)
29998 .kr(8)
29999 .sr(1)
30000 .m(1)
30001 .n(4)
30002 .k(k)
30003 .a_zero_point(0)
30004 .b_zero_point(0)
30005 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
30006 }
30007 }
30008#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
30009
30010
30011#if XNN_ARCH_X86 || XNN_ARCH_X86_64
30012 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_eq_8) {
30013 TEST_REQUIRES_X86_XOP;
30014 GemmMicrokernelTester()
30015 .mr(2)
30016 .nr(4)
30017 .kr(8)
30018 .sr(1)
30019 .m(2)
30020 .n(4)
30021 .k(8)
30022 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
30023 }
30024
30025 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, strided_cn) {
30026 TEST_REQUIRES_X86_XOP;
30027 GemmMicrokernelTester()
30028 .mr(2)
30029 .nr(4)
30030 .kr(8)
30031 .sr(1)
30032 .m(2)
30033 .n(4)
30034 .k(8)
30035 .cn_stride(7)
30036 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
30037 }
30038
30039 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_eq_8_subtile) {
30040 TEST_REQUIRES_X86_XOP;
30041 for (uint32_t m = 1; m <= 2; m++) {
30042 for (uint32_t n = 1; n <= 4; n++) {
30043 GemmMicrokernelTester()
30044 .mr(2)
30045 .nr(4)
30046 .kr(8)
30047 .sr(1)
30048 .m(m)
30049 .n(n)
30050 .k(8)
30051 .iterations(1)
30052 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
30053 }
30054 }
30055 }
30056
30057 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_eq_8_subtile_m) {
30058 TEST_REQUIRES_X86_XOP;
30059 for (uint32_t m = 1; m <= 2; m++) {
30060 GemmMicrokernelTester()
30061 .mr(2)
30062 .nr(4)
30063 .kr(8)
30064 .sr(1)
30065 .m(m)
30066 .n(4)
30067 .k(8)
30068 .iterations(1)
30069 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
30070 }
30071 }
30072
30073 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_eq_8_subtile_n) {
30074 TEST_REQUIRES_X86_XOP;
30075 for (uint32_t n = 1; n <= 4; n++) {
30076 GemmMicrokernelTester()
30077 .mr(2)
30078 .nr(4)
30079 .kr(8)
30080 .sr(1)
30081 .m(2)
30082 .n(n)
30083 .k(8)
30084 .iterations(1)
30085 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
30086 }
30087 }
30088
30089 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_lt_8) {
30090 TEST_REQUIRES_X86_XOP;
30091 for (size_t k = 1; k < 8; k++) {
30092 GemmMicrokernelTester()
30093 .mr(2)
30094 .nr(4)
30095 .kr(8)
30096 .sr(1)
30097 .m(2)
30098 .n(4)
30099 .k(k)
30100 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
30101 }
30102 }
30103
30104 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_lt_8_subtile) {
30105 TEST_REQUIRES_X86_XOP;
30106 for (size_t k = 1; k < 8; k++) {
30107 for (uint32_t m = 1; m <= 2; m++) {
30108 for (uint32_t n = 1; n <= 4; n++) {
30109 GemmMicrokernelTester()
30110 .mr(2)
30111 .nr(4)
30112 .kr(8)
30113 .sr(1)
30114 .m(m)
30115 .n(n)
30116 .k(k)
30117 .iterations(1)
30118 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
30119 }
30120 }
30121 }
30122 }
30123
30124 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_gt_8) {
30125 TEST_REQUIRES_X86_XOP;
30126 for (size_t k = 9; k < 16; k++) {
30127 GemmMicrokernelTester()
30128 .mr(2)
30129 .nr(4)
30130 .kr(8)
30131 .sr(1)
30132 .m(2)
30133 .n(4)
30134 .k(k)
30135 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
30136 }
30137 }
30138
30139 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_gt_8_subtile) {
30140 TEST_REQUIRES_X86_XOP;
30141 for (size_t k = 9; k < 16; k++) {
30142 for (uint32_t m = 1; m <= 2; m++) {
30143 for (uint32_t n = 1; n <= 4; n++) {
30144 GemmMicrokernelTester()
30145 .mr(2)
30146 .nr(4)
30147 .kr(8)
30148 .sr(1)
30149 .m(m)
30150 .n(n)
30151 .k(k)
30152 .iterations(1)
30153 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
30154 }
30155 }
30156 }
30157 }
30158
30159 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_div_8) {
30160 TEST_REQUIRES_X86_XOP;
30161 for (size_t k = 16; k <= 80; k += 8) {
30162 GemmMicrokernelTester()
30163 .mr(2)
30164 .nr(4)
30165 .kr(8)
30166 .sr(1)
30167 .m(2)
30168 .n(4)
30169 .k(k)
30170 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
30171 }
30172 }
30173
30174 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_div_8_subtile) {
30175 TEST_REQUIRES_X86_XOP;
30176 for (size_t k = 16; k <= 80; k += 8) {
30177 for (uint32_t m = 1; m <= 2; m++) {
30178 for (uint32_t n = 1; n <= 4; n++) {
30179 GemmMicrokernelTester()
30180 .mr(2)
30181 .nr(4)
30182 .kr(8)
30183 .sr(1)
30184 .m(m)
30185 .n(n)
30186 .k(k)
30187 .iterations(1)
30188 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
30189 }
30190 }
30191 }
30192 }
30193
30194 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, n_gt_4) {
30195 TEST_REQUIRES_X86_XOP;
30196 for (uint32_t n = 5; n < 8; n++) {
30197 for (size_t k = 1; k <= 40; k += 9) {
30198 GemmMicrokernelTester()
30199 .mr(2)
30200 .nr(4)
30201 .kr(8)
30202 .sr(1)
30203 .m(2)
30204 .n(4)
30205 .k(k)
30206 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
30207 }
30208 }
30209 }
30210
30211 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, n_gt_4_strided_cn) {
30212 TEST_REQUIRES_X86_XOP;
30213 for (uint32_t n = 5; n < 8; n++) {
30214 for (size_t k = 1; k <= 40; k += 9) {
30215 GemmMicrokernelTester()
30216 .mr(2)
30217 .nr(4)
30218 .kr(8)
30219 .sr(1)
30220 .m(2)
30221 .n(4)
30222 .k(k)
30223 .cn_stride(7)
30224 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
30225 }
30226 }
30227 }
30228
30229 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, n_gt_4_subtile) {
30230 TEST_REQUIRES_X86_XOP;
30231 for (uint32_t n = 5; n < 8; n++) {
30232 for (size_t k = 1; k <= 40; k += 9) {
30233 for (uint32_t m = 1; m <= 2; m++) {
30234 GemmMicrokernelTester()
30235 .mr(2)
30236 .nr(4)
30237 .kr(8)
30238 .sr(1)
30239 .m(m)
30240 .n(n)
30241 .k(k)
30242 .iterations(1)
30243 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
30244 }
30245 }
30246 }
30247 }
30248
30249 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, n_div_4) {
30250 TEST_REQUIRES_X86_XOP;
30251 for (uint32_t n = 8; n <= 12; n += 4) {
30252 for (size_t k = 1; k <= 40; k += 9) {
30253 GemmMicrokernelTester()
30254 .mr(2)
30255 .nr(4)
30256 .kr(8)
30257 .sr(1)
30258 .m(2)
30259 .n(4)
30260 .k(k)
30261 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
30262 }
30263 }
30264 }
30265
30266 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, n_div_4_strided_cn) {
30267 TEST_REQUIRES_X86_XOP;
30268 for (uint32_t n = 8; n <= 12; n += 4) {
30269 for (size_t k = 1; k <= 40; k += 9) {
30270 GemmMicrokernelTester()
30271 .mr(2)
30272 .nr(4)
30273 .kr(8)
30274 .sr(1)
30275 .m(2)
30276 .n(n)
30277 .k(k)
30278 .cn_stride(7)
30279 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
30280 }
30281 }
30282 }
30283
30284 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, n_div_4_subtile) {
30285 TEST_REQUIRES_X86_XOP;
30286 for (uint32_t n = 8; n <= 12; n += 4) {
30287 for (size_t k = 1; k <= 40; k += 9) {
30288 for (uint32_t m = 1; m <= 2; m++) {
30289 GemmMicrokernelTester()
30290 .mr(2)
30291 .nr(4)
30292 .kr(8)
30293 .sr(1)
30294 .m(m)
30295 .n(n)
30296 .k(k)
30297 .iterations(1)
30298 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
30299 }
30300 }
30301 }
30302 }
30303
30304 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, small_kernel) {
30305 TEST_REQUIRES_X86_XOP;
30306 for (size_t k = 1; k <= 40; k += 9) {
30307 GemmMicrokernelTester()
30308 .mr(2)
30309 .nr(4)
30310 .kr(8)
30311 .sr(1)
30312 .m(2)
30313 .n(4)
30314 .k(k)
30315 .ks(3)
30316 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
30317 }
30318 }
30319
30320 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, small_kernel_subtile) {
30321 TEST_REQUIRES_X86_XOP;
30322 for (size_t k = 1; k <= 40; k += 9) {
30323 for (uint32_t m = 1; m <= 2; m++) {
30324 for (uint32_t n = 1; n <= 4; n++) {
30325 GemmMicrokernelTester()
30326 .mr(2)
30327 .nr(4)
30328 .kr(8)
30329 .sr(1)
30330 .m(m)
30331 .n(n)
30332 .k(k)
30333 .ks(3)
30334 .iterations(1)
30335 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
30336 }
30337 }
30338 }
30339 }
30340
30341 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, n_gt_4_small_kernel) {
30342 TEST_REQUIRES_X86_XOP;
30343 for (uint32_t n = 5; n < 8; n++) {
30344 for (size_t k = 1; k <= 40; k += 9) {
30345 GemmMicrokernelTester()
30346 .mr(2)
30347 .nr(4)
30348 .kr(8)
30349 .sr(1)
30350 .m(2)
30351 .n(4)
30352 .k(k)
30353 .ks(3)
30354 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
30355 }
30356 }
30357 }
30358
30359 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, n_div_4_small_kernel) {
30360 TEST_REQUIRES_X86_XOP;
30361 for (uint32_t n = 8; n <= 12; n += 4) {
30362 for (size_t k = 1; k <= 40; k += 9) {
30363 GemmMicrokernelTester()
30364 .mr(2)
30365 .nr(4)
30366 .kr(8)
30367 .sr(1)
30368 .m(2)
30369 .n(4)
30370 .k(k)
30371 .ks(3)
30372 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
30373 }
30374 }
30375 }
30376
30377 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, strided_cm_subtile) {
30378 TEST_REQUIRES_X86_XOP;
30379 for (size_t k = 1; k <= 40; k += 9) {
30380 for (uint32_t m = 1; m <= 2; m++) {
30381 for (uint32_t n = 1; n <= 4; n++) {
30382 GemmMicrokernelTester()
30383 .mr(2)
30384 .nr(4)
30385 .kr(8)
30386 .sr(1)
30387 .m(m)
30388 .n(n)
30389 .k(k)
30390 .cm_stride(7)
30391 .iterations(1)
30392 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
30393 }
30394 }
30395 }
30396 }
30397
30398 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, a_offset) {
30399 TEST_REQUIRES_X86_XOP;
30400 for (size_t k = 1; k <= 40; k += 9) {
30401 GemmMicrokernelTester()
30402 .mr(2)
30403 .nr(4)
30404 .kr(8)
30405 .sr(1)
30406 .m(2)
30407 .n(4)
30408 .k(k)
30409 .ks(3)
30410 .a_offset(83)
30411 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
30412 }
30413 }
30414
30415 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, zero) {
30416 TEST_REQUIRES_X86_XOP;
30417 for (uint32_t mz = 0; mz < 2; mz++) {
30418 for (size_t k = 1; k <= 40; k += 9) {
30419 GemmMicrokernelTester()
30420 .mr(2)
30421 .nr(4)
30422 .kr(8)
30423 .sr(1)
30424 .m(2)
30425 .n(4)
30426 .k(k)
30427 .ks(3)
30428 .a_offset(83)
30429 .zero_index(mz)
30430 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
30431 }
30432 }
30433 }
30434
30435 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, qmin) {
30436 TEST_REQUIRES_X86_XOP;
30437 GemmMicrokernelTester()
30438 .mr(2)
30439 .nr(4)
30440 .kr(8)
30441 .sr(1)
30442 .m(2)
30443 .n(4)
30444 .k(8)
30445 .qmin(128)
30446 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
30447 }
30448
30449 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, qmax) {
30450 TEST_REQUIRES_X86_XOP;
30451 GemmMicrokernelTester()
30452 .mr(2)
30453 .nr(4)
30454 .kr(8)
30455 .sr(1)
30456 .m(2)
30457 .n(4)
30458 .k(8)
30459 .qmax(128)
30460 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
30461 }
30462
30463 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, strided_cm) {
30464 TEST_REQUIRES_X86_XOP;
30465 GemmMicrokernelTester()
30466 .mr(2)
30467 .nr(4)
30468 .kr(8)
30469 .sr(1)
30470 .m(2)
30471 .n(4)
30472 .k(8)
30473 .cm_stride(7)
30474 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
30475 }
30476
30477 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, no_a_zero_point) {
30478 TEST_REQUIRES_X86_XOP;
30479 for (size_t k = 1; k <= 40; k += 9) {
30480 GemmMicrokernelTester()
30481 .mr(2)
30482 .nr(4)
30483 .kr(8)
30484 .sr(1)
30485 .m(2)
30486 .n(4)
30487 .k(k)
30488 .a_zero_point(0)
30489 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
30490 }
30491 }
30492
30493 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, no_b_zero_point) {
30494 TEST_REQUIRES_X86_XOP;
30495 for (size_t k = 1; k <= 40; k += 9) {
30496 GemmMicrokernelTester()
30497 .mr(2)
30498 .nr(4)
30499 .kr(8)
30500 .sr(1)
30501 .m(2)
30502 .n(4)
30503 .k(k)
30504 .b_zero_point(0)
30505 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
30506 }
30507 }
30508
30509 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, no_zero_point) {
30510 TEST_REQUIRES_X86_XOP;
30511 for (size_t k = 1; k <= 40; k += 9) {
30512 GemmMicrokernelTester()
30513 .mr(2)
30514 .nr(4)
30515 .kr(8)
30516 .sr(1)
30517 .m(2)
30518 .n(4)
30519 .k(k)
30520 .a_zero_point(0)
30521 .b_zero_point(0)
30522 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
30523 }
30524 }
30525#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
30526
30527
30528#if XNN_ARCH_X86 || XNN_ARCH_X86_64
30529 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_eq_8) {
30530 TEST_REQUIRES_X86_XOP;
30531 GemmMicrokernelTester()
30532 .mr(3)
30533 .nr(4)
30534 .kr(8)
30535 .sr(1)
30536 .m(3)
30537 .n(4)
30538 .k(8)
30539 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
30540 }
30541
30542 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, strided_cn) {
30543 TEST_REQUIRES_X86_XOP;
30544 GemmMicrokernelTester()
30545 .mr(3)
30546 .nr(4)
30547 .kr(8)
30548 .sr(1)
30549 .m(3)
30550 .n(4)
30551 .k(8)
30552 .cn_stride(7)
30553 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
30554 }
30555
30556 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_eq_8_subtile) {
30557 TEST_REQUIRES_X86_XOP;
30558 for (uint32_t m = 1; m <= 3; m++) {
30559 for (uint32_t n = 1; n <= 4; n++) {
30560 GemmMicrokernelTester()
30561 .mr(3)
30562 .nr(4)
30563 .kr(8)
30564 .sr(1)
30565 .m(m)
30566 .n(n)
30567 .k(8)
30568 .iterations(1)
30569 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
30570 }
30571 }
30572 }
30573
30574 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_eq_8_subtile_m) {
30575 TEST_REQUIRES_X86_XOP;
30576 for (uint32_t m = 1; m <= 3; m++) {
30577 GemmMicrokernelTester()
30578 .mr(3)
30579 .nr(4)
30580 .kr(8)
30581 .sr(1)
30582 .m(m)
30583 .n(4)
30584 .k(8)
30585 .iterations(1)
30586 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
30587 }
30588 }
30589
30590 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_eq_8_subtile_n) {
30591 TEST_REQUIRES_X86_XOP;
30592 for (uint32_t n = 1; n <= 4; n++) {
30593 GemmMicrokernelTester()
30594 .mr(3)
30595 .nr(4)
30596 .kr(8)
30597 .sr(1)
30598 .m(3)
30599 .n(n)
30600 .k(8)
30601 .iterations(1)
30602 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
30603 }
30604 }
30605
30606 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_lt_8) {
30607 TEST_REQUIRES_X86_XOP;
30608 for (size_t k = 1; k < 8; k++) {
30609 GemmMicrokernelTester()
30610 .mr(3)
30611 .nr(4)
30612 .kr(8)
30613 .sr(1)
30614 .m(3)
30615 .n(4)
30616 .k(k)
30617 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
30618 }
30619 }
30620
30621 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_lt_8_subtile) {
30622 TEST_REQUIRES_X86_XOP;
30623 for (size_t k = 1; k < 8; k++) {
30624 for (uint32_t m = 1; m <= 3; m++) {
30625 for (uint32_t n = 1; n <= 4; n++) {
30626 GemmMicrokernelTester()
30627 .mr(3)
30628 .nr(4)
30629 .kr(8)
30630 .sr(1)
30631 .m(m)
30632 .n(n)
30633 .k(k)
30634 .iterations(1)
30635 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
30636 }
30637 }
30638 }
30639 }
30640
30641 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_gt_8) {
30642 TEST_REQUIRES_X86_XOP;
30643 for (size_t k = 9; k < 16; k++) {
30644 GemmMicrokernelTester()
30645 .mr(3)
30646 .nr(4)
30647 .kr(8)
30648 .sr(1)
30649 .m(3)
30650 .n(4)
30651 .k(k)
30652 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
30653 }
30654 }
30655
30656 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_gt_8_subtile) {
30657 TEST_REQUIRES_X86_XOP;
30658 for (size_t k = 9; k < 16; k++) {
30659 for (uint32_t m = 1; m <= 3; m++) {
30660 for (uint32_t n = 1; n <= 4; n++) {
30661 GemmMicrokernelTester()
30662 .mr(3)
30663 .nr(4)
30664 .kr(8)
30665 .sr(1)
30666 .m(m)
30667 .n(n)
30668 .k(k)
30669 .iterations(1)
30670 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
30671 }
30672 }
30673 }
30674 }
30675
30676 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_div_8) {
30677 TEST_REQUIRES_X86_XOP;
30678 for (size_t k = 16; k <= 80; k += 8) {
30679 GemmMicrokernelTester()
30680 .mr(3)
30681 .nr(4)
30682 .kr(8)
30683 .sr(1)
30684 .m(3)
30685 .n(4)
30686 .k(k)
30687 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
30688 }
30689 }
30690
30691 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_div_8_subtile) {
30692 TEST_REQUIRES_X86_XOP;
30693 for (size_t k = 16; k <= 80; k += 8) {
30694 for (uint32_t m = 1; m <= 3; m++) {
30695 for (uint32_t n = 1; n <= 4; n++) {
30696 GemmMicrokernelTester()
30697 .mr(3)
30698 .nr(4)
30699 .kr(8)
30700 .sr(1)
30701 .m(m)
30702 .n(n)
30703 .k(k)
30704 .iterations(1)
30705 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
30706 }
30707 }
30708 }
30709 }
30710
30711 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_gt_4) {
30712 TEST_REQUIRES_X86_XOP;
30713 for (uint32_t n = 5; n < 8; n++) {
30714 for (size_t k = 1; k <= 40; k += 9) {
30715 GemmMicrokernelTester()
30716 .mr(3)
30717 .nr(4)
30718 .kr(8)
30719 .sr(1)
30720 .m(3)
30721 .n(4)
30722 .k(k)
30723 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
30724 }
30725 }
30726 }
30727
30728 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_gt_4_strided_cn) {
30729 TEST_REQUIRES_X86_XOP;
30730 for (uint32_t n = 5; n < 8; n++) {
30731 for (size_t k = 1; k <= 40; k += 9) {
30732 GemmMicrokernelTester()
30733 .mr(3)
30734 .nr(4)
30735 .kr(8)
30736 .sr(1)
30737 .m(3)
30738 .n(4)
30739 .k(k)
30740 .cn_stride(7)
30741 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
30742 }
30743 }
30744 }
30745
30746 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_gt_4_subtile) {
30747 TEST_REQUIRES_X86_XOP;
30748 for (uint32_t n = 5; n < 8; n++) {
30749 for (size_t k = 1; k <= 40; k += 9) {
30750 for (uint32_t m = 1; m <= 3; m++) {
30751 GemmMicrokernelTester()
30752 .mr(3)
30753 .nr(4)
30754 .kr(8)
30755 .sr(1)
30756 .m(m)
30757 .n(n)
30758 .k(k)
30759 .iterations(1)
30760 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
30761 }
30762 }
30763 }
30764 }
30765
30766 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_div_4) {
30767 TEST_REQUIRES_X86_XOP;
30768 for (uint32_t n = 8; n <= 12; n += 4) {
30769 for (size_t k = 1; k <= 40; k += 9) {
30770 GemmMicrokernelTester()
30771 .mr(3)
30772 .nr(4)
30773 .kr(8)
30774 .sr(1)
30775 .m(3)
30776 .n(4)
30777 .k(k)
30778 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
30779 }
30780 }
30781 }
30782
30783 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_div_4_strided_cn) {
30784 TEST_REQUIRES_X86_XOP;
30785 for (uint32_t n = 8; n <= 12; n += 4) {
30786 for (size_t k = 1; k <= 40; k += 9) {
30787 GemmMicrokernelTester()
30788 .mr(3)
30789 .nr(4)
30790 .kr(8)
30791 .sr(1)
30792 .m(3)
30793 .n(n)
30794 .k(k)
30795 .cn_stride(7)
30796 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
30797 }
30798 }
30799 }
30800
30801 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_div_4_subtile) {
30802 TEST_REQUIRES_X86_XOP;
30803 for (uint32_t n = 8; n <= 12; n += 4) {
30804 for (size_t k = 1; k <= 40; k += 9) {
30805 for (uint32_t m = 1; m <= 3; m++) {
30806 GemmMicrokernelTester()
30807 .mr(3)
30808 .nr(4)
30809 .kr(8)
30810 .sr(1)
30811 .m(m)
30812 .n(n)
30813 .k(k)
30814 .iterations(1)
30815 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
30816 }
30817 }
30818 }
30819 }
30820
30821 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, small_kernel) {
30822 TEST_REQUIRES_X86_XOP;
30823 for (size_t k = 1; k <= 40; k += 9) {
30824 GemmMicrokernelTester()
30825 .mr(3)
30826 .nr(4)
30827 .kr(8)
30828 .sr(1)
30829 .m(3)
30830 .n(4)
30831 .k(k)
30832 .ks(3)
30833 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
30834 }
30835 }
30836
30837 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, small_kernel_subtile) {
30838 TEST_REQUIRES_X86_XOP;
30839 for (size_t k = 1; k <= 40; k += 9) {
30840 for (uint32_t m = 1; m <= 3; m++) {
30841 for (uint32_t n = 1; n <= 4; n++) {
30842 GemmMicrokernelTester()
30843 .mr(3)
30844 .nr(4)
30845 .kr(8)
30846 .sr(1)
30847 .m(m)
30848 .n(n)
30849 .k(k)
30850 .ks(3)
30851 .iterations(1)
30852 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
30853 }
30854 }
30855 }
30856 }
30857
30858 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_gt_4_small_kernel) {
30859 TEST_REQUIRES_X86_XOP;
30860 for (uint32_t n = 5; n < 8; n++) {
30861 for (size_t k = 1; k <= 40; k += 9) {
30862 GemmMicrokernelTester()
30863 .mr(3)
30864 .nr(4)
30865 .kr(8)
30866 .sr(1)
30867 .m(3)
30868 .n(4)
30869 .k(k)
30870 .ks(3)
30871 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
30872 }
30873 }
30874 }
30875
30876 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_div_4_small_kernel) {
30877 TEST_REQUIRES_X86_XOP;
30878 for (uint32_t n = 8; n <= 12; n += 4) {
30879 for (size_t k = 1; k <= 40; k += 9) {
30880 GemmMicrokernelTester()
30881 .mr(3)
30882 .nr(4)
30883 .kr(8)
30884 .sr(1)
30885 .m(3)
30886 .n(4)
30887 .k(k)
30888 .ks(3)
30889 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
30890 }
30891 }
30892 }
30893
30894 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, strided_cm_subtile) {
30895 TEST_REQUIRES_X86_XOP;
30896 for (size_t k = 1; k <= 40; k += 9) {
30897 for (uint32_t m = 1; m <= 3; m++) {
30898 for (uint32_t n = 1; n <= 4; n++) {
30899 GemmMicrokernelTester()
30900 .mr(3)
30901 .nr(4)
30902 .kr(8)
30903 .sr(1)
30904 .m(m)
30905 .n(n)
30906 .k(k)
30907 .cm_stride(7)
30908 .iterations(1)
30909 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
30910 }
30911 }
30912 }
30913 }
30914
30915 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, a_offset) {
30916 TEST_REQUIRES_X86_XOP;
30917 for (size_t k = 1; k <= 40; k += 9) {
30918 GemmMicrokernelTester()
30919 .mr(3)
30920 .nr(4)
30921 .kr(8)
30922 .sr(1)
30923 .m(3)
30924 .n(4)
30925 .k(k)
30926 .ks(3)
30927 .a_offset(127)
30928 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
30929 }
30930 }
30931
30932 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, zero) {
30933 TEST_REQUIRES_X86_XOP;
30934 for (uint32_t mz = 0; mz < 3; mz++) {
30935 for (size_t k = 1; k <= 40; k += 9) {
30936 GemmMicrokernelTester()
30937 .mr(3)
30938 .nr(4)
30939 .kr(8)
30940 .sr(1)
30941 .m(3)
30942 .n(4)
30943 .k(k)
30944 .ks(3)
30945 .a_offset(127)
30946 .zero_index(mz)
30947 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
30948 }
30949 }
30950 }
30951
30952 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, qmin) {
30953 TEST_REQUIRES_X86_XOP;
30954 GemmMicrokernelTester()
30955 .mr(3)
30956 .nr(4)
30957 .kr(8)
30958 .sr(1)
30959 .m(3)
30960 .n(4)
30961 .k(8)
30962 .qmin(128)
30963 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
30964 }
30965
30966 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, qmax) {
30967 TEST_REQUIRES_X86_XOP;
30968 GemmMicrokernelTester()
30969 .mr(3)
30970 .nr(4)
30971 .kr(8)
30972 .sr(1)
30973 .m(3)
30974 .n(4)
30975 .k(8)
30976 .qmax(128)
30977 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
30978 }
30979
30980 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, strided_cm) {
30981 TEST_REQUIRES_X86_XOP;
30982 GemmMicrokernelTester()
30983 .mr(3)
30984 .nr(4)
30985 .kr(8)
30986 .sr(1)
30987 .m(3)
30988 .n(4)
30989 .k(8)
30990 .cm_stride(7)
30991 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
30992 }
30993
30994 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, no_a_zero_point) {
30995 TEST_REQUIRES_X86_XOP;
30996 for (size_t k = 1; k <= 40; k += 9) {
30997 GemmMicrokernelTester()
30998 .mr(3)
30999 .nr(4)
31000 .kr(8)
31001 .sr(1)
31002 .m(3)
31003 .n(4)
31004 .k(k)
31005 .a_zero_point(0)
31006 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
31007 }
31008 }
31009
31010 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, no_b_zero_point) {
31011 TEST_REQUIRES_X86_XOP;
31012 for (size_t k = 1; k <= 40; k += 9) {
31013 GemmMicrokernelTester()
31014 .mr(3)
31015 .nr(4)
31016 .kr(8)
31017 .sr(1)
31018 .m(3)
31019 .n(4)
31020 .k(k)
31021 .b_zero_point(0)
31022 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
31023 }
31024 }
31025
31026 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, no_zero_point) {
31027 TEST_REQUIRES_X86_XOP;
31028 for (size_t k = 1; k <= 40; k += 9) {
31029 GemmMicrokernelTester()
31030 .mr(3)
31031 .nr(4)
31032 .kr(8)
31033 .sr(1)
31034 .m(3)
31035 .n(4)
31036 .k(k)
31037 .a_zero_point(0)
31038 .b_zero_point(0)
31039 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
31040 }
31041 }
31042#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan902ef7f2021-07-02 16:11:06 -070031043
31044
31045#if XNN_ARCH_X86 || XNN_ARCH_X86_64
31046 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_eq_8) {
31047 TEST_REQUIRES_X86_AVX2;
31048 GemmMicrokernelTester()
31049 .mr(1)
31050 .nr(8)
31051 .kr(8)
31052 .sr(1)
31053 .m(1)
31054 .n(8)
31055 .k(8)
31056 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
31057 }
31058
31059 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, strided_cn) {
31060 TEST_REQUIRES_X86_AVX2;
31061 GemmMicrokernelTester()
31062 .mr(1)
31063 .nr(8)
31064 .kr(8)
31065 .sr(1)
31066 .m(1)
31067 .n(8)
31068 .k(8)
31069 .cn_stride(11)
31070 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
31071 }
31072
31073 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_eq_8_subtile) {
31074 TEST_REQUIRES_X86_AVX2;
31075 for (uint32_t m = 1; m <= 1; m++) {
31076 for (uint32_t n = 1; n <= 8; n++) {
31077 GemmMicrokernelTester()
31078 .mr(1)
31079 .nr(8)
31080 .kr(8)
31081 .sr(1)
31082 .m(m)
31083 .n(n)
31084 .k(8)
31085 .iterations(1)
31086 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
31087 }
31088 }
31089 }
31090
31091 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_eq_8_subtile_m) {
31092 TEST_REQUIRES_X86_AVX2;
31093 for (uint32_t m = 1; m <= 1; m++) {
31094 GemmMicrokernelTester()
31095 .mr(1)
31096 .nr(8)
31097 .kr(8)
31098 .sr(1)
31099 .m(m)
31100 .n(8)
31101 .k(8)
31102 .iterations(1)
31103 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
31104 }
31105 }
31106
31107 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_eq_8_subtile_n) {
31108 TEST_REQUIRES_X86_AVX2;
31109 for (uint32_t n = 1; n <= 8; n++) {
31110 GemmMicrokernelTester()
31111 .mr(1)
31112 .nr(8)
31113 .kr(8)
31114 .sr(1)
31115 .m(1)
31116 .n(n)
31117 .k(8)
31118 .iterations(1)
31119 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
31120 }
31121 }
31122
31123 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_lt_8) {
31124 TEST_REQUIRES_X86_AVX2;
31125 for (size_t k = 1; k < 8; k++) {
31126 GemmMicrokernelTester()
31127 .mr(1)
31128 .nr(8)
31129 .kr(8)
31130 .sr(1)
31131 .m(1)
31132 .n(8)
31133 .k(k)
31134 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
31135 }
31136 }
31137
31138 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_lt_8_subtile) {
31139 TEST_REQUIRES_X86_AVX2;
31140 for (size_t k = 1; k < 8; k++) {
31141 for (uint32_t m = 1; m <= 1; m++) {
31142 for (uint32_t n = 1; n <= 8; n++) {
31143 GemmMicrokernelTester()
31144 .mr(1)
31145 .nr(8)
31146 .kr(8)
31147 .sr(1)
31148 .m(m)
31149 .n(n)
31150 .k(k)
31151 .iterations(1)
31152 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
31153 }
31154 }
31155 }
31156 }
31157
31158 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_gt_8) {
31159 TEST_REQUIRES_X86_AVX2;
31160 for (size_t k = 9; k < 16; k++) {
31161 GemmMicrokernelTester()
31162 .mr(1)
31163 .nr(8)
31164 .kr(8)
31165 .sr(1)
31166 .m(1)
31167 .n(8)
31168 .k(k)
31169 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
31170 }
31171 }
31172
31173 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_gt_8_subtile) {
31174 TEST_REQUIRES_X86_AVX2;
31175 for (size_t k = 9; k < 16; k++) {
31176 for (uint32_t m = 1; m <= 1; m++) {
31177 for (uint32_t n = 1; n <= 8; n++) {
31178 GemmMicrokernelTester()
31179 .mr(1)
31180 .nr(8)
31181 .kr(8)
31182 .sr(1)
31183 .m(m)
31184 .n(n)
31185 .k(k)
31186 .iterations(1)
31187 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
31188 }
31189 }
31190 }
31191 }
31192
31193 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_div_8) {
31194 TEST_REQUIRES_X86_AVX2;
31195 for (size_t k = 16; k <= 80; k += 8) {
31196 GemmMicrokernelTester()
31197 .mr(1)
31198 .nr(8)
31199 .kr(8)
31200 .sr(1)
31201 .m(1)
31202 .n(8)
31203 .k(k)
31204 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
31205 }
31206 }
31207
31208 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_div_8_subtile) {
31209 TEST_REQUIRES_X86_AVX2;
31210 for (size_t k = 16; k <= 80; k += 8) {
31211 for (uint32_t m = 1; m <= 1; m++) {
31212 for (uint32_t n = 1; n <= 8; n++) {
31213 GemmMicrokernelTester()
31214 .mr(1)
31215 .nr(8)
31216 .kr(8)
31217 .sr(1)
31218 .m(m)
31219 .n(n)
31220 .k(k)
31221 .iterations(1)
31222 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
31223 }
31224 }
31225 }
31226 }
31227
31228 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_gt_8) {
31229 TEST_REQUIRES_X86_AVX2;
31230 for (uint32_t n = 9; n < 16; n++) {
31231 for (size_t k = 1; k <= 40; k += 9) {
31232 GemmMicrokernelTester()
31233 .mr(1)
31234 .nr(8)
31235 .kr(8)
31236 .sr(1)
31237 .m(1)
31238 .n(8)
31239 .k(k)
31240 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
31241 }
31242 }
31243 }
31244
31245 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_gt_8_strided_cn) {
31246 TEST_REQUIRES_X86_AVX2;
31247 for (uint32_t n = 9; n < 16; n++) {
31248 for (size_t k = 1; k <= 40; k += 9) {
31249 GemmMicrokernelTester()
31250 .mr(1)
31251 .nr(8)
31252 .kr(8)
31253 .sr(1)
31254 .m(1)
31255 .n(8)
31256 .k(k)
31257 .cn_stride(11)
31258 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
31259 }
31260 }
31261 }
31262
31263 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_gt_8_subtile) {
31264 TEST_REQUIRES_X86_AVX2;
31265 for (uint32_t n = 9; n < 16; n++) {
31266 for (size_t k = 1; k <= 40; k += 9) {
31267 for (uint32_t m = 1; m <= 1; m++) {
31268 GemmMicrokernelTester()
31269 .mr(1)
31270 .nr(8)
31271 .kr(8)
31272 .sr(1)
31273 .m(m)
31274 .n(n)
31275 .k(k)
31276 .iterations(1)
31277 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
31278 }
31279 }
31280 }
31281 }
31282
31283 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_div_8) {
31284 TEST_REQUIRES_X86_AVX2;
31285 for (uint32_t n = 16; n <= 24; n += 8) {
31286 for (size_t k = 1; k <= 40; k += 9) {
31287 GemmMicrokernelTester()
31288 .mr(1)
31289 .nr(8)
31290 .kr(8)
31291 .sr(1)
31292 .m(1)
31293 .n(8)
31294 .k(k)
31295 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
31296 }
31297 }
31298 }
31299
31300 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_div_8_strided_cn) {
31301 TEST_REQUIRES_X86_AVX2;
31302 for (uint32_t n = 16; n <= 24; n += 8) {
31303 for (size_t k = 1; k <= 40; k += 9) {
31304 GemmMicrokernelTester()
31305 .mr(1)
31306 .nr(8)
31307 .kr(8)
31308 .sr(1)
31309 .m(1)
31310 .n(n)
31311 .k(k)
31312 .cn_stride(11)
31313 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
31314 }
31315 }
31316 }
31317
31318 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_div_8_subtile) {
31319 TEST_REQUIRES_X86_AVX2;
31320 for (uint32_t n = 16; n <= 24; n += 8) {
31321 for (size_t k = 1; k <= 40; k += 9) {
31322 for (uint32_t m = 1; m <= 1; m++) {
31323 GemmMicrokernelTester()
31324 .mr(1)
31325 .nr(8)
31326 .kr(8)
31327 .sr(1)
31328 .m(m)
31329 .n(n)
31330 .k(k)
31331 .iterations(1)
31332 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
31333 }
31334 }
31335 }
31336 }
31337
31338 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, small_kernel) {
31339 TEST_REQUIRES_X86_AVX2;
31340 for (size_t k = 1; k <= 40; k += 9) {
31341 GemmMicrokernelTester()
31342 .mr(1)
31343 .nr(8)
31344 .kr(8)
31345 .sr(1)
31346 .m(1)
31347 .n(8)
31348 .k(k)
31349 .ks(3)
31350 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
31351 }
31352 }
31353
31354 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, small_kernel_subtile) {
31355 TEST_REQUIRES_X86_AVX2;
31356 for (size_t k = 1; k <= 40; k += 9) {
31357 for (uint32_t m = 1; m <= 1; m++) {
31358 for (uint32_t n = 1; n <= 8; n++) {
31359 GemmMicrokernelTester()
31360 .mr(1)
31361 .nr(8)
31362 .kr(8)
31363 .sr(1)
31364 .m(m)
31365 .n(n)
31366 .k(k)
31367 .ks(3)
31368 .iterations(1)
31369 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
31370 }
31371 }
31372 }
31373 }
31374
31375 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_gt_8_small_kernel) {
31376 TEST_REQUIRES_X86_AVX2;
31377 for (uint32_t n = 9; n < 16; n++) {
31378 for (size_t k = 1; k <= 40; k += 9) {
31379 GemmMicrokernelTester()
31380 .mr(1)
31381 .nr(8)
31382 .kr(8)
31383 .sr(1)
31384 .m(1)
31385 .n(8)
31386 .k(k)
31387 .ks(3)
31388 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
31389 }
31390 }
31391 }
31392
31393 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_div_8_small_kernel) {
31394 TEST_REQUIRES_X86_AVX2;
31395 for (uint32_t n = 16; n <= 24; n += 8) {
31396 for (size_t k = 1; k <= 40; k += 9) {
31397 GemmMicrokernelTester()
31398 .mr(1)
31399 .nr(8)
31400 .kr(8)
31401 .sr(1)
31402 .m(1)
31403 .n(8)
31404 .k(k)
31405 .ks(3)
31406 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
31407 }
31408 }
31409 }
31410
31411 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, strided_cm_subtile) {
31412 TEST_REQUIRES_X86_AVX2;
31413 for (size_t k = 1; k <= 40; k += 9) {
31414 for (uint32_t m = 1; m <= 1; m++) {
31415 for (uint32_t n = 1; n <= 8; n++) {
31416 GemmMicrokernelTester()
31417 .mr(1)
31418 .nr(8)
31419 .kr(8)
31420 .sr(1)
31421 .m(m)
31422 .n(n)
31423 .k(k)
31424 .cm_stride(11)
31425 .iterations(1)
31426 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
31427 }
31428 }
31429 }
31430 }
31431
31432 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, a_offset) {
31433 TEST_REQUIRES_X86_AVX2;
31434 for (size_t k = 1; k <= 40; k += 9) {
31435 GemmMicrokernelTester()
31436 .mr(1)
31437 .nr(8)
31438 .kr(8)
31439 .sr(1)
31440 .m(1)
31441 .n(8)
31442 .k(k)
31443 .ks(3)
31444 .a_offset(43)
31445 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
31446 }
31447 }
31448
31449 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, zero) {
31450 TEST_REQUIRES_X86_AVX2;
31451 for (uint32_t mz = 0; mz < 1; mz++) {
31452 for (size_t k = 1; k <= 40; k += 9) {
31453 GemmMicrokernelTester()
31454 .mr(1)
31455 .nr(8)
31456 .kr(8)
31457 .sr(1)
31458 .m(1)
31459 .n(8)
31460 .k(k)
31461 .ks(3)
31462 .a_offset(43)
31463 .zero_index(mz)
31464 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
31465 }
31466 }
31467 }
31468
31469 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, qmin) {
31470 TEST_REQUIRES_X86_AVX2;
31471 GemmMicrokernelTester()
31472 .mr(1)
31473 .nr(8)
31474 .kr(8)
31475 .sr(1)
31476 .m(1)
31477 .n(8)
31478 .k(8)
31479 .qmin(128)
31480 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
31481 }
31482
31483 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, qmax) {
31484 TEST_REQUIRES_X86_AVX2;
31485 GemmMicrokernelTester()
31486 .mr(1)
31487 .nr(8)
31488 .kr(8)
31489 .sr(1)
31490 .m(1)
31491 .n(8)
31492 .k(8)
31493 .qmax(128)
31494 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
31495 }
31496
31497 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, strided_cm) {
31498 TEST_REQUIRES_X86_AVX2;
31499 GemmMicrokernelTester()
31500 .mr(1)
31501 .nr(8)
31502 .kr(8)
31503 .sr(1)
31504 .m(1)
31505 .n(8)
31506 .k(8)
31507 .cm_stride(11)
31508 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
31509 }
31510
31511 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, no_a_zero_point) {
31512 TEST_REQUIRES_X86_AVX2;
31513 for (size_t k = 1; k <= 40; k += 9) {
31514 GemmMicrokernelTester()
31515 .mr(1)
31516 .nr(8)
31517 .kr(8)
31518 .sr(1)
31519 .m(1)
31520 .n(8)
31521 .k(k)
31522 .a_zero_point(0)
31523 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
31524 }
31525 }
31526
31527 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, no_b_zero_point) {
31528 TEST_REQUIRES_X86_AVX2;
31529 for (size_t k = 1; k <= 40; k += 9) {
31530 GemmMicrokernelTester()
31531 .mr(1)
31532 .nr(8)
31533 .kr(8)
31534 .sr(1)
31535 .m(1)
31536 .n(8)
31537 .k(k)
31538 .b_zero_point(0)
31539 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
31540 }
31541 }
31542
31543 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, no_zero_point) {
31544 TEST_REQUIRES_X86_AVX2;
31545 for (size_t k = 1; k <= 40; k += 9) {
31546 GemmMicrokernelTester()
31547 .mr(1)
31548 .nr(8)
31549 .kr(8)
31550 .sr(1)
31551 .m(1)
31552 .n(8)
31553 .k(k)
31554 .a_zero_point(0)
31555 .b_zero_point(0)
31556 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
31557 }
31558 }
31559#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
31560
31561
31562#if XNN_ARCH_X86 || XNN_ARCH_X86_64
31563 TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, k_eq_8) {
31564 TEST_REQUIRES_X86_AVX2;
31565 GemmMicrokernelTester()
31566 .mr(2)
31567 .nr(8)
31568 .kr(8)
31569 .sr(1)
31570 .m(2)
31571 .n(8)
31572 .k(8)
31573 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
31574 }
31575
31576 TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, strided_cn) {
31577 TEST_REQUIRES_X86_AVX2;
31578 GemmMicrokernelTester()
31579 .mr(2)
31580 .nr(8)
31581 .kr(8)
31582 .sr(1)
31583 .m(2)
31584 .n(8)
31585 .k(8)
31586 .cn_stride(11)
31587 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
31588 }
31589
31590 TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, k_eq_8_subtile) {
31591 TEST_REQUIRES_X86_AVX2;
31592 for (uint32_t m = 1; m <= 2; m++) {
31593 for (uint32_t n = 1; n <= 8; n++) {
31594 GemmMicrokernelTester()
31595 .mr(2)
31596 .nr(8)
31597 .kr(8)
31598 .sr(1)
31599 .m(m)
31600 .n(n)
31601 .k(8)
31602 .iterations(1)
31603 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
31604 }
31605 }
31606 }
31607
31608 TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, k_eq_8_subtile_m) {
31609 TEST_REQUIRES_X86_AVX2;
31610 for (uint32_t m = 1; m <= 2; m++) {
31611 GemmMicrokernelTester()
31612 .mr(2)
31613 .nr(8)
31614 .kr(8)
31615 .sr(1)
31616 .m(m)
31617 .n(8)
31618 .k(8)
31619 .iterations(1)
31620 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
31621 }
31622 }
31623
31624 TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, k_eq_8_subtile_n) {
31625 TEST_REQUIRES_X86_AVX2;
31626 for (uint32_t n = 1; n <= 8; n++) {
31627 GemmMicrokernelTester()
31628 .mr(2)
31629 .nr(8)
31630 .kr(8)
31631 .sr(1)
31632 .m(2)
31633 .n(n)
31634 .k(8)
31635 .iterations(1)
31636 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
31637 }
31638 }
31639
31640 TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, k_lt_8) {
31641 TEST_REQUIRES_X86_AVX2;
31642 for (size_t k = 1; k < 8; k++) {
31643 GemmMicrokernelTester()
31644 .mr(2)
31645 .nr(8)
31646 .kr(8)
31647 .sr(1)
31648 .m(2)
31649 .n(8)
31650 .k(k)
31651 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
31652 }
31653 }
31654
31655 TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, k_lt_8_subtile) {
31656 TEST_REQUIRES_X86_AVX2;
31657 for (size_t k = 1; k < 8; k++) {
31658 for (uint32_t m = 1; m <= 2; m++) {
31659 for (uint32_t n = 1; n <= 8; n++) {
31660 GemmMicrokernelTester()
31661 .mr(2)
31662 .nr(8)
31663 .kr(8)
31664 .sr(1)
31665 .m(m)
31666 .n(n)
31667 .k(k)
31668 .iterations(1)
31669 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
31670 }
31671 }
31672 }
31673 }
31674
31675 TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, k_gt_8) {
31676 TEST_REQUIRES_X86_AVX2;
31677 for (size_t k = 9; k < 16; k++) {
31678 GemmMicrokernelTester()
31679 .mr(2)
31680 .nr(8)
31681 .kr(8)
31682 .sr(1)
31683 .m(2)
31684 .n(8)
31685 .k(k)
31686 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
31687 }
31688 }
31689
31690 TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, k_gt_8_subtile) {
31691 TEST_REQUIRES_X86_AVX2;
31692 for (size_t k = 9; k < 16; k++) {
31693 for (uint32_t m = 1; m <= 2; m++) {
31694 for (uint32_t n = 1; n <= 8; n++) {
31695 GemmMicrokernelTester()
31696 .mr(2)
31697 .nr(8)
31698 .kr(8)
31699 .sr(1)
31700 .m(m)
31701 .n(n)
31702 .k(k)
31703 .iterations(1)
31704 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
31705 }
31706 }
31707 }
31708 }
31709
31710 TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, k_div_8) {
31711 TEST_REQUIRES_X86_AVX2;
31712 for (size_t k = 16; k <= 80; k += 8) {
31713 GemmMicrokernelTester()
31714 .mr(2)
31715 .nr(8)
31716 .kr(8)
31717 .sr(1)
31718 .m(2)
31719 .n(8)
31720 .k(k)
31721 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
31722 }
31723 }
31724
31725 TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, k_div_8_subtile) {
31726 TEST_REQUIRES_X86_AVX2;
31727 for (size_t k = 16; k <= 80; k += 8) {
31728 for (uint32_t m = 1; m <= 2; m++) {
31729 for (uint32_t n = 1; n <= 8; n++) {
31730 GemmMicrokernelTester()
31731 .mr(2)
31732 .nr(8)
31733 .kr(8)
31734 .sr(1)
31735 .m(m)
31736 .n(n)
31737 .k(k)
31738 .iterations(1)
31739 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
31740 }
31741 }
31742 }
31743 }
31744
31745 TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, n_gt_8) {
31746 TEST_REQUIRES_X86_AVX2;
31747 for (uint32_t n = 9; n < 16; n++) {
31748 for (size_t k = 1; k <= 40; k += 9) {
31749 GemmMicrokernelTester()
31750 .mr(2)
31751 .nr(8)
31752 .kr(8)
31753 .sr(1)
31754 .m(2)
31755 .n(8)
31756 .k(k)
31757 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
31758 }
31759 }
31760 }
31761
31762 TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, n_gt_8_strided_cn) {
31763 TEST_REQUIRES_X86_AVX2;
31764 for (uint32_t n = 9; n < 16; n++) {
31765 for (size_t k = 1; k <= 40; k += 9) {
31766 GemmMicrokernelTester()
31767 .mr(2)
31768 .nr(8)
31769 .kr(8)
31770 .sr(1)
31771 .m(2)
31772 .n(8)
31773 .k(k)
31774 .cn_stride(11)
31775 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
31776 }
31777 }
31778 }
31779
31780 TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, n_gt_8_subtile) {
31781 TEST_REQUIRES_X86_AVX2;
31782 for (uint32_t n = 9; n < 16; n++) {
31783 for (size_t k = 1; k <= 40; k += 9) {
31784 for (uint32_t m = 1; m <= 2; m++) {
31785 GemmMicrokernelTester()
31786 .mr(2)
31787 .nr(8)
31788 .kr(8)
31789 .sr(1)
31790 .m(m)
31791 .n(n)
31792 .k(k)
31793 .iterations(1)
31794 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
31795 }
31796 }
31797 }
31798 }
31799
31800 TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, n_div_8) {
31801 TEST_REQUIRES_X86_AVX2;
31802 for (uint32_t n = 16; n <= 24; n += 8) {
31803 for (size_t k = 1; k <= 40; k += 9) {
31804 GemmMicrokernelTester()
31805 .mr(2)
31806 .nr(8)
31807 .kr(8)
31808 .sr(1)
31809 .m(2)
31810 .n(8)
31811 .k(k)
31812 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
31813 }
31814 }
31815 }
31816
31817 TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, n_div_8_strided_cn) {
31818 TEST_REQUIRES_X86_AVX2;
31819 for (uint32_t n = 16; n <= 24; n += 8) {
31820 for (size_t k = 1; k <= 40; k += 9) {
31821 GemmMicrokernelTester()
31822 .mr(2)
31823 .nr(8)
31824 .kr(8)
31825 .sr(1)
31826 .m(2)
31827 .n(n)
31828 .k(k)
31829 .cn_stride(11)
31830 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
31831 }
31832 }
31833 }
31834
31835 TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, n_div_8_subtile) {
31836 TEST_REQUIRES_X86_AVX2;
31837 for (uint32_t n = 16; n <= 24; n += 8) {
31838 for (size_t k = 1; k <= 40; k += 9) {
31839 for (uint32_t m = 1; m <= 2; m++) {
31840 GemmMicrokernelTester()
31841 .mr(2)
31842 .nr(8)
31843 .kr(8)
31844 .sr(1)
31845 .m(m)
31846 .n(n)
31847 .k(k)
31848 .iterations(1)
31849 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
31850 }
31851 }
31852 }
31853 }
31854
31855 TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, small_kernel) {
31856 TEST_REQUIRES_X86_AVX2;
31857 for (size_t k = 1; k <= 40; k += 9) {
31858 GemmMicrokernelTester()
31859 .mr(2)
31860 .nr(8)
31861 .kr(8)
31862 .sr(1)
31863 .m(2)
31864 .n(8)
31865 .k(k)
31866 .ks(3)
31867 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
31868 }
31869 }
31870
31871 TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, small_kernel_subtile) {
31872 TEST_REQUIRES_X86_AVX2;
31873 for (size_t k = 1; k <= 40; k += 9) {
31874 for (uint32_t m = 1; m <= 2; m++) {
31875 for (uint32_t n = 1; n <= 8; n++) {
31876 GemmMicrokernelTester()
31877 .mr(2)
31878 .nr(8)
31879 .kr(8)
31880 .sr(1)
31881 .m(m)
31882 .n(n)
31883 .k(k)
31884 .ks(3)
31885 .iterations(1)
31886 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
31887 }
31888 }
31889 }
31890 }
31891
31892 TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, n_gt_8_small_kernel) {
31893 TEST_REQUIRES_X86_AVX2;
31894 for (uint32_t n = 9; n < 16; n++) {
31895 for (size_t k = 1; k <= 40; k += 9) {
31896 GemmMicrokernelTester()
31897 .mr(2)
31898 .nr(8)
31899 .kr(8)
31900 .sr(1)
31901 .m(2)
31902 .n(8)
31903 .k(k)
31904 .ks(3)
31905 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
31906 }
31907 }
31908 }
31909
31910 TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, n_div_8_small_kernel) {
31911 TEST_REQUIRES_X86_AVX2;
31912 for (uint32_t n = 16; n <= 24; n += 8) {
31913 for (size_t k = 1; k <= 40; k += 9) {
31914 GemmMicrokernelTester()
31915 .mr(2)
31916 .nr(8)
31917 .kr(8)
31918 .sr(1)
31919 .m(2)
31920 .n(8)
31921 .k(k)
31922 .ks(3)
31923 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
31924 }
31925 }
31926 }
31927
31928 TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, strided_cm_subtile) {
31929 TEST_REQUIRES_X86_AVX2;
31930 for (size_t k = 1; k <= 40; k += 9) {
31931 for (uint32_t m = 1; m <= 2; m++) {
31932 for (uint32_t n = 1; n <= 8; n++) {
31933 GemmMicrokernelTester()
31934 .mr(2)
31935 .nr(8)
31936 .kr(8)
31937 .sr(1)
31938 .m(m)
31939 .n(n)
31940 .k(k)
31941 .cm_stride(11)
31942 .iterations(1)
31943 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
31944 }
31945 }
31946 }
31947 }
31948
31949 TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, a_offset) {
31950 TEST_REQUIRES_X86_AVX2;
31951 for (size_t k = 1; k <= 40; k += 9) {
31952 GemmMicrokernelTester()
31953 .mr(2)
31954 .nr(8)
31955 .kr(8)
31956 .sr(1)
31957 .m(2)
31958 .n(8)
31959 .k(k)
31960 .ks(3)
31961 .a_offset(83)
31962 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
31963 }
31964 }
31965
31966 TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, zero) {
31967 TEST_REQUIRES_X86_AVX2;
31968 for (uint32_t mz = 0; mz < 2; mz++) {
31969 for (size_t k = 1; k <= 40; k += 9) {
31970 GemmMicrokernelTester()
31971 .mr(2)
31972 .nr(8)
31973 .kr(8)
31974 .sr(1)
31975 .m(2)
31976 .n(8)
31977 .k(k)
31978 .ks(3)
31979 .a_offset(83)
31980 .zero_index(mz)
31981 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
31982 }
31983 }
31984 }
31985
31986 TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, qmin) {
31987 TEST_REQUIRES_X86_AVX2;
31988 GemmMicrokernelTester()
31989 .mr(2)
31990 .nr(8)
31991 .kr(8)
31992 .sr(1)
31993 .m(2)
31994 .n(8)
31995 .k(8)
31996 .qmin(128)
31997 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
31998 }
31999
32000 TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, qmax) {
32001 TEST_REQUIRES_X86_AVX2;
32002 GemmMicrokernelTester()
32003 .mr(2)
32004 .nr(8)
32005 .kr(8)
32006 .sr(1)
32007 .m(2)
32008 .n(8)
32009 .k(8)
32010 .qmax(128)
32011 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
32012 }
32013
32014 TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, strided_cm) {
32015 TEST_REQUIRES_X86_AVX2;
32016 GemmMicrokernelTester()
32017 .mr(2)
32018 .nr(8)
32019 .kr(8)
32020 .sr(1)
32021 .m(2)
32022 .n(8)
32023 .k(8)
32024 .cm_stride(11)
32025 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
32026 }
32027
32028 TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, no_a_zero_point) {
32029 TEST_REQUIRES_X86_AVX2;
32030 for (size_t k = 1; k <= 40; k += 9) {
32031 GemmMicrokernelTester()
32032 .mr(2)
32033 .nr(8)
32034 .kr(8)
32035 .sr(1)
32036 .m(2)
32037 .n(8)
32038 .k(k)
32039 .a_zero_point(0)
32040 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
32041 }
32042 }
32043
32044 TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, no_b_zero_point) {
32045 TEST_REQUIRES_X86_AVX2;
32046 for (size_t k = 1; k <= 40; k += 9) {
32047 GemmMicrokernelTester()
32048 .mr(2)
32049 .nr(8)
32050 .kr(8)
32051 .sr(1)
32052 .m(2)
32053 .n(8)
32054 .k(k)
32055 .b_zero_point(0)
32056 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
32057 }
32058 }
32059
32060 TEST(QU8_IGEMM_MINMAX_FP32_2X8C8__AVX2, no_zero_point) {
32061 TEST_REQUIRES_X86_AVX2;
32062 for (size_t k = 1; k <= 40; k += 9) {
32063 GemmMicrokernelTester()
32064 .mr(2)
32065 .nr(8)
32066 .kr(8)
32067 .sr(1)
32068 .m(2)
32069 .n(8)
32070 .k(k)
32071 .a_zero_point(0)
32072 .b_zero_point(0)
32073 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
32074 }
32075 }
32076#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
32077
32078
32079#if XNN_ARCH_X86 || XNN_ARCH_X86_64
32080 TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, k_eq_8) {
32081 TEST_REQUIRES_X86_AVX2;
32082 GemmMicrokernelTester()
32083 .mr(3)
32084 .nr(8)
32085 .kr(8)
32086 .sr(1)
32087 .m(3)
32088 .n(8)
32089 .k(8)
32090 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
32091 }
32092
32093 TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, strided_cn) {
32094 TEST_REQUIRES_X86_AVX2;
32095 GemmMicrokernelTester()
32096 .mr(3)
32097 .nr(8)
32098 .kr(8)
32099 .sr(1)
32100 .m(3)
32101 .n(8)
32102 .k(8)
32103 .cn_stride(11)
32104 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
32105 }
32106
32107 TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, k_eq_8_subtile) {
32108 TEST_REQUIRES_X86_AVX2;
32109 for (uint32_t m = 1; m <= 3; m++) {
32110 for (uint32_t n = 1; n <= 8; n++) {
32111 GemmMicrokernelTester()
32112 .mr(3)
32113 .nr(8)
32114 .kr(8)
32115 .sr(1)
32116 .m(m)
32117 .n(n)
32118 .k(8)
32119 .iterations(1)
32120 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
32121 }
32122 }
32123 }
32124
32125 TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, k_eq_8_subtile_m) {
32126 TEST_REQUIRES_X86_AVX2;
32127 for (uint32_t m = 1; m <= 3; m++) {
32128 GemmMicrokernelTester()
32129 .mr(3)
32130 .nr(8)
32131 .kr(8)
32132 .sr(1)
32133 .m(m)
32134 .n(8)
32135 .k(8)
32136 .iterations(1)
32137 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
32138 }
32139 }
32140
32141 TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, k_eq_8_subtile_n) {
32142 TEST_REQUIRES_X86_AVX2;
32143 for (uint32_t n = 1; n <= 8; n++) {
32144 GemmMicrokernelTester()
32145 .mr(3)
32146 .nr(8)
32147 .kr(8)
32148 .sr(1)
32149 .m(3)
32150 .n(n)
32151 .k(8)
32152 .iterations(1)
32153 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
32154 }
32155 }
32156
32157 TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, k_lt_8) {
32158 TEST_REQUIRES_X86_AVX2;
32159 for (size_t k = 1; k < 8; k++) {
32160 GemmMicrokernelTester()
32161 .mr(3)
32162 .nr(8)
32163 .kr(8)
32164 .sr(1)
32165 .m(3)
32166 .n(8)
32167 .k(k)
32168 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
32169 }
32170 }
32171
32172 TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, k_lt_8_subtile) {
32173 TEST_REQUIRES_X86_AVX2;
32174 for (size_t k = 1; k < 8; k++) {
32175 for (uint32_t m = 1; m <= 3; m++) {
32176 for (uint32_t n = 1; n <= 8; n++) {
32177 GemmMicrokernelTester()
32178 .mr(3)
32179 .nr(8)
32180 .kr(8)
32181 .sr(1)
32182 .m(m)
32183 .n(n)
32184 .k(k)
32185 .iterations(1)
32186 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
32187 }
32188 }
32189 }
32190 }
32191
32192 TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, k_gt_8) {
32193 TEST_REQUIRES_X86_AVX2;
32194 for (size_t k = 9; k < 16; k++) {
32195 GemmMicrokernelTester()
32196 .mr(3)
32197 .nr(8)
32198 .kr(8)
32199 .sr(1)
32200 .m(3)
32201 .n(8)
32202 .k(k)
32203 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
32204 }
32205 }
32206
32207 TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, k_gt_8_subtile) {
32208 TEST_REQUIRES_X86_AVX2;
32209 for (size_t k = 9; k < 16; k++) {
32210 for (uint32_t m = 1; m <= 3; m++) {
32211 for (uint32_t n = 1; n <= 8; n++) {
32212 GemmMicrokernelTester()
32213 .mr(3)
32214 .nr(8)
32215 .kr(8)
32216 .sr(1)
32217 .m(m)
32218 .n(n)
32219 .k(k)
32220 .iterations(1)
32221 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
32222 }
32223 }
32224 }
32225 }
32226
32227 TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, k_div_8) {
32228 TEST_REQUIRES_X86_AVX2;
32229 for (size_t k = 16; k <= 80; k += 8) {
32230 GemmMicrokernelTester()
32231 .mr(3)
32232 .nr(8)
32233 .kr(8)
32234 .sr(1)
32235 .m(3)
32236 .n(8)
32237 .k(k)
32238 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
32239 }
32240 }
32241
32242 TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, k_div_8_subtile) {
32243 TEST_REQUIRES_X86_AVX2;
32244 for (size_t k = 16; k <= 80; k += 8) {
32245 for (uint32_t m = 1; m <= 3; m++) {
32246 for (uint32_t n = 1; n <= 8; n++) {
32247 GemmMicrokernelTester()
32248 .mr(3)
32249 .nr(8)
32250 .kr(8)
32251 .sr(1)
32252 .m(m)
32253 .n(n)
32254 .k(k)
32255 .iterations(1)
32256 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
32257 }
32258 }
32259 }
32260 }
32261
32262 TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, n_gt_8) {
32263 TEST_REQUIRES_X86_AVX2;
32264 for (uint32_t n = 9; n < 16; n++) {
32265 for (size_t k = 1; k <= 40; k += 9) {
32266 GemmMicrokernelTester()
32267 .mr(3)
32268 .nr(8)
32269 .kr(8)
32270 .sr(1)
32271 .m(3)
32272 .n(8)
32273 .k(k)
32274 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
32275 }
32276 }
32277 }
32278
32279 TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, n_gt_8_strided_cn) {
32280 TEST_REQUIRES_X86_AVX2;
32281 for (uint32_t n = 9; n < 16; n++) {
32282 for (size_t k = 1; k <= 40; k += 9) {
32283 GemmMicrokernelTester()
32284 .mr(3)
32285 .nr(8)
32286 .kr(8)
32287 .sr(1)
32288 .m(3)
32289 .n(8)
32290 .k(k)
32291 .cn_stride(11)
32292 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
32293 }
32294 }
32295 }
32296
32297 TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, n_gt_8_subtile) {
32298 TEST_REQUIRES_X86_AVX2;
32299 for (uint32_t n = 9; n < 16; n++) {
32300 for (size_t k = 1; k <= 40; k += 9) {
32301 for (uint32_t m = 1; m <= 3; m++) {
32302 GemmMicrokernelTester()
32303 .mr(3)
32304 .nr(8)
32305 .kr(8)
32306 .sr(1)
32307 .m(m)
32308 .n(n)
32309 .k(k)
32310 .iterations(1)
32311 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
32312 }
32313 }
32314 }
32315 }
32316
32317 TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, n_div_8) {
32318 TEST_REQUIRES_X86_AVX2;
32319 for (uint32_t n = 16; n <= 24; n += 8) {
32320 for (size_t k = 1; k <= 40; k += 9) {
32321 GemmMicrokernelTester()
32322 .mr(3)
32323 .nr(8)
32324 .kr(8)
32325 .sr(1)
32326 .m(3)
32327 .n(8)
32328 .k(k)
32329 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
32330 }
32331 }
32332 }
32333
32334 TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, n_div_8_strided_cn) {
32335 TEST_REQUIRES_X86_AVX2;
32336 for (uint32_t n = 16; n <= 24; n += 8) {
32337 for (size_t k = 1; k <= 40; k += 9) {
32338 GemmMicrokernelTester()
32339 .mr(3)
32340 .nr(8)
32341 .kr(8)
32342 .sr(1)
32343 .m(3)
32344 .n(n)
32345 .k(k)
32346 .cn_stride(11)
32347 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
32348 }
32349 }
32350 }
32351
32352 TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, n_div_8_subtile) {
32353 TEST_REQUIRES_X86_AVX2;
32354 for (uint32_t n = 16; n <= 24; n += 8) {
32355 for (size_t k = 1; k <= 40; k += 9) {
32356 for (uint32_t m = 1; m <= 3; m++) {
32357 GemmMicrokernelTester()
32358 .mr(3)
32359 .nr(8)
32360 .kr(8)
32361 .sr(1)
32362 .m(m)
32363 .n(n)
32364 .k(k)
32365 .iterations(1)
32366 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
32367 }
32368 }
32369 }
32370 }
32371
32372 TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, small_kernel) {
32373 TEST_REQUIRES_X86_AVX2;
32374 for (size_t k = 1; k <= 40; k += 9) {
32375 GemmMicrokernelTester()
32376 .mr(3)
32377 .nr(8)
32378 .kr(8)
32379 .sr(1)
32380 .m(3)
32381 .n(8)
32382 .k(k)
32383 .ks(3)
32384 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
32385 }
32386 }
32387
32388 TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, small_kernel_subtile) {
32389 TEST_REQUIRES_X86_AVX2;
32390 for (size_t k = 1; k <= 40; k += 9) {
32391 for (uint32_t m = 1; m <= 3; m++) {
32392 for (uint32_t n = 1; n <= 8; n++) {
32393 GemmMicrokernelTester()
32394 .mr(3)
32395 .nr(8)
32396 .kr(8)
32397 .sr(1)
32398 .m(m)
32399 .n(n)
32400 .k(k)
32401 .ks(3)
32402 .iterations(1)
32403 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
32404 }
32405 }
32406 }
32407 }
32408
32409 TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, n_gt_8_small_kernel) {
32410 TEST_REQUIRES_X86_AVX2;
32411 for (uint32_t n = 9; n < 16; n++) {
32412 for (size_t k = 1; k <= 40; k += 9) {
32413 GemmMicrokernelTester()
32414 .mr(3)
32415 .nr(8)
32416 .kr(8)
32417 .sr(1)
32418 .m(3)
32419 .n(8)
32420 .k(k)
32421 .ks(3)
32422 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
32423 }
32424 }
32425 }
32426
32427 TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, n_div_8_small_kernel) {
32428 TEST_REQUIRES_X86_AVX2;
32429 for (uint32_t n = 16; n <= 24; n += 8) {
32430 for (size_t k = 1; k <= 40; k += 9) {
32431 GemmMicrokernelTester()
32432 .mr(3)
32433 .nr(8)
32434 .kr(8)
32435 .sr(1)
32436 .m(3)
32437 .n(8)
32438 .k(k)
32439 .ks(3)
32440 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
32441 }
32442 }
32443 }
32444
32445 TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, strided_cm_subtile) {
32446 TEST_REQUIRES_X86_AVX2;
32447 for (size_t k = 1; k <= 40; k += 9) {
32448 for (uint32_t m = 1; m <= 3; m++) {
32449 for (uint32_t n = 1; n <= 8; n++) {
32450 GemmMicrokernelTester()
32451 .mr(3)
32452 .nr(8)
32453 .kr(8)
32454 .sr(1)
32455 .m(m)
32456 .n(n)
32457 .k(k)
32458 .cm_stride(11)
32459 .iterations(1)
32460 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
32461 }
32462 }
32463 }
32464 }
32465
32466 TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, a_offset) {
32467 TEST_REQUIRES_X86_AVX2;
32468 for (size_t k = 1; k <= 40; k += 9) {
32469 GemmMicrokernelTester()
32470 .mr(3)
32471 .nr(8)
32472 .kr(8)
32473 .sr(1)
32474 .m(3)
32475 .n(8)
32476 .k(k)
32477 .ks(3)
32478 .a_offset(127)
32479 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
32480 }
32481 }
32482
32483 TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, zero) {
32484 TEST_REQUIRES_X86_AVX2;
32485 for (uint32_t mz = 0; mz < 3; mz++) {
32486 for (size_t k = 1; k <= 40; k += 9) {
32487 GemmMicrokernelTester()
32488 .mr(3)
32489 .nr(8)
32490 .kr(8)
32491 .sr(1)
32492 .m(3)
32493 .n(8)
32494 .k(k)
32495 .ks(3)
32496 .a_offset(127)
32497 .zero_index(mz)
32498 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
32499 }
32500 }
32501 }
32502
32503 TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, qmin) {
32504 TEST_REQUIRES_X86_AVX2;
32505 GemmMicrokernelTester()
32506 .mr(3)
32507 .nr(8)
32508 .kr(8)
32509 .sr(1)
32510 .m(3)
32511 .n(8)
32512 .k(8)
32513 .qmin(128)
32514 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
32515 }
32516
32517 TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, qmax) {
32518 TEST_REQUIRES_X86_AVX2;
32519 GemmMicrokernelTester()
32520 .mr(3)
32521 .nr(8)
32522 .kr(8)
32523 .sr(1)
32524 .m(3)
32525 .n(8)
32526 .k(8)
32527 .qmax(128)
32528 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
32529 }
32530
32531 TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, strided_cm) {
32532 TEST_REQUIRES_X86_AVX2;
32533 GemmMicrokernelTester()
32534 .mr(3)
32535 .nr(8)
32536 .kr(8)
32537 .sr(1)
32538 .m(3)
32539 .n(8)
32540 .k(8)
32541 .cm_stride(11)
32542 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
32543 }
32544
32545 TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, no_a_zero_point) {
32546 TEST_REQUIRES_X86_AVX2;
32547 for (size_t k = 1; k <= 40; k += 9) {
32548 GemmMicrokernelTester()
32549 .mr(3)
32550 .nr(8)
32551 .kr(8)
32552 .sr(1)
32553 .m(3)
32554 .n(8)
32555 .k(k)
32556 .a_zero_point(0)
32557 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
32558 }
32559 }
32560
32561 TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, no_b_zero_point) {
32562 TEST_REQUIRES_X86_AVX2;
32563 for (size_t k = 1; k <= 40; k += 9) {
32564 GemmMicrokernelTester()
32565 .mr(3)
32566 .nr(8)
32567 .kr(8)
32568 .sr(1)
32569 .m(3)
32570 .n(8)
32571 .k(k)
32572 .b_zero_point(0)
32573 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
32574 }
32575 }
32576
32577 TEST(QU8_IGEMM_MINMAX_FP32_3X8C8__AVX2, no_zero_point) {
32578 TEST_REQUIRES_X86_AVX2;
32579 for (size_t k = 1; k <= 40; k += 9) {
32580 GemmMicrokernelTester()
32581 .mr(3)
32582 .nr(8)
32583 .kr(8)
32584 .sr(1)
32585 .m(3)
32586 .n(8)
32587 .k(k)
32588 .a_zero_point(0)
32589 .b_zero_point(0)
32590 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
32591 }
32592 }
32593#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan3cf2e222021-07-08 11:38:45 -070032594
32595
32596#if XNN_ARCH_X86 || XNN_ARCH_X86_64
32597 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_eq_8) {
32598 TEST_REQUIRES_X86_AVX512SKX;
32599 GemmMicrokernelTester()
32600 .mr(1)
32601 .nr(16)
32602 .kr(8)
32603 .sr(1)
32604 .m(1)
32605 .n(16)
32606 .k(8)
32607 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
32608 }
32609
32610 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, strided_cn) {
32611 TEST_REQUIRES_X86_AVX512SKX;
32612 GemmMicrokernelTester()
32613 .mr(1)
32614 .nr(16)
32615 .kr(8)
32616 .sr(1)
32617 .m(1)
32618 .n(16)
32619 .k(8)
32620 .cn_stride(19)
32621 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
32622 }
32623
32624 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_eq_8_subtile) {
32625 TEST_REQUIRES_X86_AVX512SKX;
32626 for (uint32_t m = 1; m <= 1; m++) {
32627 for (uint32_t n = 1; n <= 16; n++) {
32628 GemmMicrokernelTester()
32629 .mr(1)
32630 .nr(16)
32631 .kr(8)
32632 .sr(1)
32633 .m(m)
32634 .n(n)
32635 .k(8)
32636 .iterations(1)
32637 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
32638 }
32639 }
32640 }
32641
32642 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_eq_8_subtile_m) {
32643 TEST_REQUIRES_X86_AVX512SKX;
32644 for (uint32_t m = 1; m <= 1; m++) {
32645 GemmMicrokernelTester()
32646 .mr(1)
32647 .nr(16)
32648 .kr(8)
32649 .sr(1)
32650 .m(m)
32651 .n(16)
32652 .k(8)
32653 .iterations(1)
32654 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
32655 }
32656 }
32657
32658 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_eq_8_subtile_n) {
32659 TEST_REQUIRES_X86_AVX512SKX;
32660 for (uint32_t n = 1; n <= 16; n++) {
32661 GemmMicrokernelTester()
32662 .mr(1)
32663 .nr(16)
32664 .kr(8)
32665 .sr(1)
32666 .m(1)
32667 .n(n)
32668 .k(8)
32669 .iterations(1)
32670 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
32671 }
32672 }
32673
32674 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_lt_8) {
32675 TEST_REQUIRES_X86_AVX512SKX;
32676 for (size_t k = 1; k < 8; k++) {
32677 GemmMicrokernelTester()
32678 .mr(1)
32679 .nr(16)
32680 .kr(8)
32681 .sr(1)
32682 .m(1)
32683 .n(16)
32684 .k(k)
32685 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
32686 }
32687 }
32688
32689 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_lt_8_subtile) {
32690 TEST_REQUIRES_X86_AVX512SKX;
32691 for (size_t k = 1; k < 8; k++) {
32692 for (uint32_t m = 1; m <= 1; m++) {
32693 for (uint32_t n = 1; n <= 16; n++) {
32694 GemmMicrokernelTester()
32695 .mr(1)
32696 .nr(16)
32697 .kr(8)
32698 .sr(1)
32699 .m(m)
32700 .n(n)
32701 .k(k)
32702 .iterations(1)
32703 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
32704 }
32705 }
32706 }
32707 }
32708
32709 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_gt_8) {
32710 TEST_REQUIRES_X86_AVX512SKX;
32711 for (size_t k = 9; k < 16; k++) {
32712 GemmMicrokernelTester()
32713 .mr(1)
32714 .nr(16)
32715 .kr(8)
32716 .sr(1)
32717 .m(1)
32718 .n(16)
32719 .k(k)
32720 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
32721 }
32722 }
32723
32724 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_gt_8_subtile) {
32725 TEST_REQUIRES_X86_AVX512SKX;
32726 for (size_t k = 9; k < 16; k++) {
32727 for (uint32_t m = 1; m <= 1; m++) {
32728 for (uint32_t n = 1; n <= 16; n++) {
32729 GemmMicrokernelTester()
32730 .mr(1)
32731 .nr(16)
32732 .kr(8)
32733 .sr(1)
32734 .m(m)
32735 .n(n)
32736 .k(k)
32737 .iterations(1)
32738 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
32739 }
32740 }
32741 }
32742 }
32743
32744 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_div_8) {
32745 TEST_REQUIRES_X86_AVX512SKX;
32746 for (size_t k = 16; k <= 80; k += 8) {
32747 GemmMicrokernelTester()
32748 .mr(1)
32749 .nr(16)
32750 .kr(8)
32751 .sr(1)
32752 .m(1)
32753 .n(16)
32754 .k(k)
32755 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
32756 }
32757 }
32758
32759 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_div_8_subtile) {
32760 TEST_REQUIRES_X86_AVX512SKX;
32761 for (size_t k = 16; k <= 80; k += 8) {
32762 for (uint32_t m = 1; m <= 1; m++) {
32763 for (uint32_t n = 1; n <= 16; n++) {
32764 GemmMicrokernelTester()
32765 .mr(1)
32766 .nr(16)
32767 .kr(8)
32768 .sr(1)
32769 .m(m)
32770 .n(n)
32771 .k(k)
32772 .iterations(1)
32773 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
32774 }
32775 }
32776 }
32777 }
32778
32779 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, n_gt_16) {
32780 TEST_REQUIRES_X86_AVX512SKX;
32781 for (uint32_t n = 17; n < 32; n++) {
32782 for (size_t k = 1; k <= 40; k += 9) {
32783 GemmMicrokernelTester()
32784 .mr(1)
32785 .nr(16)
32786 .kr(8)
32787 .sr(1)
32788 .m(1)
32789 .n(16)
32790 .k(k)
32791 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
32792 }
32793 }
32794 }
32795
32796 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, n_gt_16_strided_cn) {
32797 TEST_REQUIRES_X86_AVX512SKX;
32798 for (uint32_t n = 17; n < 32; n++) {
32799 for (size_t k = 1; k <= 40; k += 9) {
32800 GemmMicrokernelTester()
32801 .mr(1)
32802 .nr(16)
32803 .kr(8)
32804 .sr(1)
32805 .m(1)
32806 .n(16)
32807 .k(k)
32808 .cn_stride(19)
32809 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
32810 }
32811 }
32812 }
32813
32814 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, n_gt_16_subtile) {
32815 TEST_REQUIRES_X86_AVX512SKX;
32816 for (uint32_t n = 17; n < 32; n++) {
32817 for (size_t k = 1; k <= 40; k += 9) {
32818 for (uint32_t m = 1; m <= 1; m++) {
32819 GemmMicrokernelTester()
32820 .mr(1)
32821 .nr(16)
32822 .kr(8)
32823 .sr(1)
32824 .m(m)
32825 .n(n)
32826 .k(k)
32827 .iterations(1)
32828 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
32829 }
32830 }
32831 }
32832 }
32833
32834 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, n_div_16) {
32835 TEST_REQUIRES_X86_AVX512SKX;
32836 for (uint32_t n = 32; n <= 48; n += 16) {
32837 for (size_t k = 1; k <= 40; k += 9) {
32838 GemmMicrokernelTester()
32839 .mr(1)
32840 .nr(16)
32841 .kr(8)
32842 .sr(1)
32843 .m(1)
32844 .n(16)
32845 .k(k)
32846 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
32847 }
32848 }
32849 }
32850
32851 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, n_div_16_strided_cn) {
32852 TEST_REQUIRES_X86_AVX512SKX;
32853 for (uint32_t n = 32; n <= 48; n += 16) {
32854 for (size_t k = 1; k <= 40; k += 9) {
32855 GemmMicrokernelTester()
32856 .mr(1)
32857 .nr(16)
32858 .kr(8)
32859 .sr(1)
32860 .m(1)
32861 .n(n)
32862 .k(k)
32863 .cn_stride(19)
32864 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
32865 }
32866 }
32867 }
32868
32869 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, n_div_16_subtile) {
32870 TEST_REQUIRES_X86_AVX512SKX;
32871 for (uint32_t n = 32; n <= 48; n += 16) {
32872 for (size_t k = 1; k <= 40; k += 9) {
32873 for (uint32_t m = 1; m <= 1; m++) {
32874 GemmMicrokernelTester()
32875 .mr(1)
32876 .nr(16)
32877 .kr(8)
32878 .sr(1)
32879 .m(m)
32880 .n(n)
32881 .k(k)
32882 .iterations(1)
32883 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
32884 }
32885 }
32886 }
32887 }
32888
32889 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, small_kernel) {
32890 TEST_REQUIRES_X86_AVX512SKX;
32891 for (size_t k = 1; k <= 40; k += 9) {
32892 GemmMicrokernelTester()
32893 .mr(1)
32894 .nr(16)
32895 .kr(8)
32896 .sr(1)
32897 .m(1)
32898 .n(16)
32899 .k(k)
32900 .ks(3)
32901 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
32902 }
32903 }
32904
32905 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, small_kernel_subtile) {
32906 TEST_REQUIRES_X86_AVX512SKX;
32907 for (size_t k = 1; k <= 40; k += 9) {
32908 for (uint32_t m = 1; m <= 1; m++) {
32909 for (uint32_t n = 1; n <= 16; n++) {
32910 GemmMicrokernelTester()
32911 .mr(1)
32912 .nr(16)
32913 .kr(8)
32914 .sr(1)
32915 .m(m)
32916 .n(n)
32917 .k(k)
32918 .ks(3)
32919 .iterations(1)
32920 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
32921 }
32922 }
32923 }
32924 }
32925
32926 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, n_gt_16_small_kernel) {
32927 TEST_REQUIRES_X86_AVX512SKX;
32928 for (uint32_t n = 17; n < 32; n++) {
32929 for (size_t k = 1; k <= 40; k += 9) {
32930 GemmMicrokernelTester()
32931 .mr(1)
32932 .nr(16)
32933 .kr(8)
32934 .sr(1)
32935 .m(1)
32936 .n(16)
32937 .k(k)
32938 .ks(3)
32939 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
32940 }
32941 }
32942 }
32943
32944 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, n_div_16_small_kernel) {
32945 TEST_REQUIRES_X86_AVX512SKX;
32946 for (uint32_t n = 32; n <= 48; n += 16) {
32947 for (size_t k = 1; k <= 40; k += 9) {
32948 GemmMicrokernelTester()
32949 .mr(1)
32950 .nr(16)
32951 .kr(8)
32952 .sr(1)
32953 .m(1)
32954 .n(16)
32955 .k(k)
32956 .ks(3)
32957 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
32958 }
32959 }
32960 }
32961
32962 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, strided_cm_subtile) {
32963 TEST_REQUIRES_X86_AVX512SKX;
32964 for (size_t k = 1; k <= 40; k += 9) {
32965 for (uint32_t m = 1; m <= 1; m++) {
32966 for (uint32_t n = 1; n <= 16; n++) {
32967 GemmMicrokernelTester()
32968 .mr(1)
32969 .nr(16)
32970 .kr(8)
32971 .sr(1)
32972 .m(m)
32973 .n(n)
32974 .k(k)
32975 .cm_stride(19)
32976 .iterations(1)
32977 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
32978 }
32979 }
32980 }
32981 }
32982
32983 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, a_offset) {
32984 TEST_REQUIRES_X86_AVX512SKX;
32985 for (size_t k = 1; k <= 40; k += 9) {
32986 GemmMicrokernelTester()
32987 .mr(1)
32988 .nr(16)
32989 .kr(8)
32990 .sr(1)
32991 .m(1)
32992 .n(16)
32993 .k(k)
32994 .ks(3)
32995 .a_offset(43)
32996 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
32997 }
32998 }
32999
33000 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, zero) {
33001 TEST_REQUIRES_X86_AVX512SKX;
33002 for (uint32_t mz = 0; mz < 1; mz++) {
33003 for (size_t k = 1; k <= 40; k += 9) {
33004 GemmMicrokernelTester()
33005 .mr(1)
33006 .nr(16)
33007 .kr(8)
33008 .sr(1)
33009 .m(1)
33010 .n(16)
33011 .k(k)
33012 .ks(3)
33013 .a_offset(43)
33014 .zero_index(mz)
33015 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
33016 }
33017 }
33018 }
33019
33020 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, qmin) {
33021 TEST_REQUIRES_X86_AVX512SKX;
33022 GemmMicrokernelTester()
33023 .mr(1)
33024 .nr(16)
33025 .kr(8)
33026 .sr(1)
33027 .m(1)
33028 .n(16)
33029 .k(8)
33030 .qmin(128)
33031 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
33032 }
33033
33034 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, qmax) {
33035 TEST_REQUIRES_X86_AVX512SKX;
33036 GemmMicrokernelTester()
33037 .mr(1)
33038 .nr(16)
33039 .kr(8)
33040 .sr(1)
33041 .m(1)
33042 .n(16)
33043 .k(8)
33044 .qmax(128)
33045 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
33046 }
33047
33048 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, strided_cm) {
33049 TEST_REQUIRES_X86_AVX512SKX;
33050 GemmMicrokernelTester()
33051 .mr(1)
33052 .nr(16)
33053 .kr(8)
33054 .sr(1)
33055 .m(1)
33056 .n(16)
33057 .k(8)
33058 .cm_stride(19)
33059 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
33060 }
33061
33062 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, no_a_zero_point) {
33063 TEST_REQUIRES_X86_AVX512SKX;
33064 for (size_t k = 1; k <= 40; k += 9) {
33065 GemmMicrokernelTester()
33066 .mr(1)
33067 .nr(16)
33068 .kr(8)
33069 .sr(1)
33070 .m(1)
33071 .n(16)
33072 .k(k)
33073 .a_zero_point(0)
33074 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
33075 }
33076 }
33077
33078 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, no_b_zero_point) {
33079 TEST_REQUIRES_X86_AVX512SKX;
33080 for (size_t k = 1; k <= 40; k += 9) {
33081 GemmMicrokernelTester()
33082 .mr(1)
33083 .nr(16)
33084 .kr(8)
33085 .sr(1)
33086 .m(1)
33087 .n(16)
33088 .k(k)
33089 .b_zero_point(0)
33090 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
33091 }
33092 }
33093
33094 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, no_zero_point) {
33095 TEST_REQUIRES_X86_AVX512SKX;
33096 for (size_t k = 1; k <= 40; k += 9) {
33097 GemmMicrokernelTester()
33098 .mr(1)
33099 .nr(16)
33100 .kr(8)
33101 .sr(1)
33102 .m(1)
33103 .n(16)
33104 .k(k)
33105 .a_zero_point(0)
33106 .b_zero_point(0)
33107 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
33108 }
33109 }
33110#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
33111
33112
33113#if XNN_ARCH_X86 || XNN_ARCH_X86_64
33114 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_eq_8) {
33115 TEST_REQUIRES_X86_AVX512SKX;
33116 GemmMicrokernelTester()
33117 .mr(2)
33118 .nr(16)
33119 .kr(8)
33120 .sr(1)
33121 .m(2)
33122 .n(16)
33123 .k(8)
33124 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
33125 }
33126
33127 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, strided_cn) {
33128 TEST_REQUIRES_X86_AVX512SKX;
33129 GemmMicrokernelTester()
33130 .mr(2)
33131 .nr(16)
33132 .kr(8)
33133 .sr(1)
33134 .m(2)
33135 .n(16)
33136 .k(8)
33137 .cn_stride(19)
33138 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
33139 }
33140
33141 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_eq_8_subtile) {
33142 TEST_REQUIRES_X86_AVX512SKX;
33143 for (uint32_t m = 1; m <= 2; m++) {
33144 for (uint32_t n = 1; n <= 16; n++) {
33145 GemmMicrokernelTester()
33146 .mr(2)
33147 .nr(16)
33148 .kr(8)
33149 .sr(1)
33150 .m(m)
33151 .n(n)
33152 .k(8)
33153 .iterations(1)
33154 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
33155 }
33156 }
33157 }
33158
33159 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_eq_8_subtile_m) {
33160 TEST_REQUIRES_X86_AVX512SKX;
33161 for (uint32_t m = 1; m <= 2; m++) {
33162 GemmMicrokernelTester()
33163 .mr(2)
33164 .nr(16)
33165 .kr(8)
33166 .sr(1)
33167 .m(m)
33168 .n(16)
33169 .k(8)
33170 .iterations(1)
33171 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
33172 }
33173 }
33174
33175 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_eq_8_subtile_n) {
33176 TEST_REQUIRES_X86_AVX512SKX;
33177 for (uint32_t n = 1; n <= 16; n++) {
33178 GemmMicrokernelTester()
33179 .mr(2)
33180 .nr(16)
33181 .kr(8)
33182 .sr(1)
33183 .m(2)
33184 .n(n)
33185 .k(8)
33186 .iterations(1)
33187 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
33188 }
33189 }
33190
33191 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_lt_8) {
33192 TEST_REQUIRES_X86_AVX512SKX;
33193 for (size_t k = 1; k < 8; k++) {
33194 GemmMicrokernelTester()
33195 .mr(2)
33196 .nr(16)
33197 .kr(8)
33198 .sr(1)
33199 .m(2)
33200 .n(16)
33201 .k(k)
33202 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
33203 }
33204 }
33205
33206 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_lt_8_subtile) {
33207 TEST_REQUIRES_X86_AVX512SKX;
33208 for (size_t k = 1; k < 8; k++) {
33209 for (uint32_t m = 1; m <= 2; m++) {
33210 for (uint32_t n = 1; n <= 16; n++) {
33211 GemmMicrokernelTester()
33212 .mr(2)
33213 .nr(16)
33214 .kr(8)
33215 .sr(1)
33216 .m(m)
33217 .n(n)
33218 .k(k)
33219 .iterations(1)
33220 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
33221 }
33222 }
33223 }
33224 }
33225
33226 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_gt_8) {
33227 TEST_REQUIRES_X86_AVX512SKX;
33228 for (size_t k = 9; k < 16; k++) {
33229 GemmMicrokernelTester()
33230 .mr(2)
33231 .nr(16)
33232 .kr(8)
33233 .sr(1)
33234 .m(2)
33235 .n(16)
33236 .k(k)
33237 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
33238 }
33239 }
33240
33241 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_gt_8_subtile) {
33242 TEST_REQUIRES_X86_AVX512SKX;
33243 for (size_t k = 9; k < 16; k++) {
33244 for (uint32_t m = 1; m <= 2; m++) {
33245 for (uint32_t n = 1; n <= 16; n++) {
33246 GemmMicrokernelTester()
33247 .mr(2)
33248 .nr(16)
33249 .kr(8)
33250 .sr(1)
33251 .m(m)
33252 .n(n)
33253 .k(k)
33254 .iterations(1)
33255 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
33256 }
33257 }
33258 }
33259 }
33260
33261 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_div_8) {
33262 TEST_REQUIRES_X86_AVX512SKX;
33263 for (size_t k = 16; k <= 80; k += 8) {
33264 GemmMicrokernelTester()
33265 .mr(2)
33266 .nr(16)
33267 .kr(8)
33268 .sr(1)
33269 .m(2)
33270 .n(16)
33271 .k(k)
33272 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
33273 }
33274 }
33275
33276 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_div_8_subtile) {
33277 TEST_REQUIRES_X86_AVX512SKX;
33278 for (size_t k = 16; k <= 80; k += 8) {
33279 for (uint32_t m = 1; m <= 2; m++) {
33280 for (uint32_t n = 1; n <= 16; n++) {
33281 GemmMicrokernelTester()
33282 .mr(2)
33283 .nr(16)
33284 .kr(8)
33285 .sr(1)
33286 .m(m)
33287 .n(n)
33288 .k(k)
33289 .iterations(1)
33290 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
33291 }
33292 }
33293 }
33294 }
33295
33296 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_gt_16) {
33297 TEST_REQUIRES_X86_AVX512SKX;
33298 for (uint32_t n = 17; n < 32; n++) {
33299 for (size_t k = 1; k <= 40; k += 9) {
33300 GemmMicrokernelTester()
33301 .mr(2)
33302 .nr(16)
33303 .kr(8)
33304 .sr(1)
33305 .m(2)
33306 .n(16)
33307 .k(k)
33308 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
33309 }
33310 }
33311 }
33312
33313 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_gt_16_strided_cn) {
33314 TEST_REQUIRES_X86_AVX512SKX;
33315 for (uint32_t n = 17; n < 32; n++) {
33316 for (size_t k = 1; k <= 40; k += 9) {
33317 GemmMicrokernelTester()
33318 .mr(2)
33319 .nr(16)
33320 .kr(8)
33321 .sr(1)
33322 .m(2)
33323 .n(16)
33324 .k(k)
33325 .cn_stride(19)
33326 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
33327 }
33328 }
33329 }
33330
33331 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_gt_16_subtile) {
33332 TEST_REQUIRES_X86_AVX512SKX;
33333 for (uint32_t n = 17; n < 32; n++) {
33334 for (size_t k = 1; k <= 40; k += 9) {
33335 for (uint32_t m = 1; m <= 2; m++) {
33336 GemmMicrokernelTester()
33337 .mr(2)
33338 .nr(16)
33339 .kr(8)
33340 .sr(1)
33341 .m(m)
33342 .n(n)
33343 .k(k)
33344 .iterations(1)
33345 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
33346 }
33347 }
33348 }
33349 }
33350
33351 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_div_16) {
33352 TEST_REQUIRES_X86_AVX512SKX;
33353 for (uint32_t n = 32; n <= 48; n += 16) {
33354 for (size_t k = 1; k <= 40; k += 9) {
33355 GemmMicrokernelTester()
33356 .mr(2)
33357 .nr(16)
33358 .kr(8)
33359 .sr(1)
33360 .m(2)
33361 .n(16)
33362 .k(k)
33363 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
33364 }
33365 }
33366 }
33367
33368 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_div_16_strided_cn) {
33369 TEST_REQUIRES_X86_AVX512SKX;
33370 for (uint32_t n = 32; n <= 48; n += 16) {
33371 for (size_t k = 1; k <= 40; k += 9) {
33372 GemmMicrokernelTester()
33373 .mr(2)
33374 .nr(16)
33375 .kr(8)
33376 .sr(1)
33377 .m(2)
33378 .n(n)
33379 .k(k)
33380 .cn_stride(19)
33381 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
33382 }
33383 }
33384 }
33385
33386 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_div_16_subtile) {
33387 TEST_REQUIRES_X86_AVX512SKX;
33388 for (uint32_t n = 32; n <= 48; n += 16) {
33389 for (size_t k = 1; k <= 40; k += 9) {
33390 for (uint32_t m = 1; m <= 2; m++) {
33391 GemmMicrokernelTester()
33392 .mr(2)
33393 .nr(16)
33394 .kr(8)
33395 .sr(1)
33396 .m(m)
33397 .n(n)
33398 .k(k)
33399 .iterations(1)
33400 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
33401 }
33402 }
33403 }
33404 }
33405
33406 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, small_kernel) {
33407 TEST_REQUIRES_X86_AVX512SKX;
33408 for (size_t k = 1; k <= 40; k += 9) {
33409 GemmMicrokernelTester()
33410 .mr(2)
33411 .nr(16)
33412 .kr(8)
33413 .sr(1)
33414 .m(2)
33415 .n(16)
33416 .k(k)
33417 .ks(3)
33418 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
33419 }
33420 }
33421
33422 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, small_kernel_subtile) {
33423 TEST_REQUIRES_X86_AVX512SKX;
33424 for (size_t k = 1; k <= 40; k += 9) {
33425 for (uint32_t m = 1; m <= 2; m++) {
33426 for (uint32_t n = 1; n <= 16; n++) {
33427 GemmMicrokernelTester()
33428 .mr(2)
33429 .nr(16)
33430 .kr(8)
33431 .sr(1)
33432 .m(m)
33433 .n(n)
33434 .k(k)
33435 .ks(3)
33436 .iterations(1)
33437 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
33438 }
33439 }
33440 }
33441 }
33442
33443 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_gt_16_small_kernel) {
33444 TEST_REQUIRES_X86_AVX512SKX;
33445 for (uint32_t n = 17; n < 32; n++) {
33446 for (size_t k = 1; k <= 40; k += 9) {
33447 GemmMicrokernelTester()
33448 .mr(2)
33449 .nr(16)
33450 .kr(8)
33451 .sr(1)
33452 .m(2)
33453 .n(16)
33454 .k(k)
33455 .ks(3)
33456 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
33457 }
33458 }
33459 }
33460
33461 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_div_16_small_kernel) {
33462 TEST_REQUIRES_X86_AVX512SKX;
33463 for (uint32_t n = 32; n <= 48; n += 16) {
33464 for (size_t k = 1; k <= 40; k += 9) {
33465 GemmMicrokernelTester()
33466 .mr(2)
33467 .nr(16)
33468 .kr(8)
33469 .sr(1)
33470 .m(2)
33471 .n(16)
33472 .k(k)
33473 .ks(3)
33474 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
33475 }
33476 }
33477 }
33478
33479 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, strided_cm_subtile) {
33480 TEST_REQUIRES_X86_AVX512SKX;
33481 for (size_t k = 1; k <= 40; k += 9) {
33482 for (uint32_t m = 1; m <= 2; m++) {
33483 for (uint32_t n = 1; n <= 16; n++) {
33484 GemmMicrokernelTester()
33485 .mr(2)
33486 .nr(16)
33487 .kr(8)
33488 .sr(1)
33489 .m(m)
33490 .n(n)
33491 .k(k)
33492 .cm_stride(19)
33493 .iterations(1)
33494 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
33495 }
33496 }
33497 }
33498 }
33499
33500 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, a_offset) {
33501 TEST_REQUIRES_X86_AVX512SKX;
33502 for (size_t k = 1; k <= 40; k += 9) {
33503 GemmMicrokernelTester()
33504 .mr(2)
33505 .nr(16)
33506 .kr(8)
33507 .sr(1)
33508 .m(2)
33509 .n(16)
33510 .k(k)
33511 .ks(3)
33512 .a_offset(83)
33513 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
33514 }
33515 }
33516
33517 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, zero) {
33518 TEST_REQUIRES_X86_AVX512SKX;
33519 for (uint32_t mz = 0; mz < 2; mz++) {
33520 for (size_t k = 1; k <= 40; k += 9) {
33521 GemmMicrokernelTester()
33522 .mr(2)
33523 .nr(16)
33524 .kr(8)
33525 .sr(1)
33526 .m(2)
33527 .n(16)
33528 .k(k)
33529 .ks(3)
33530 .a_offset(83)
33531 .zero_index(mz)
33532 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
33533 }
33534 }
33535 }
33536
33537 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, qmin) {
33538 TEST_REQUIRES_X86_AVX512SKX;
33539 GemmMicrokernelTester()
33540 .mr(2)
33541 .nr(16)
33542 .kr(8)
33543 .sr(1)
33544 .m(2)
33545 .n(16)
33546 .k(8)
33547 .qmin(128)
33548 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
33549 }
33550
33551 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, qmax) {
33552 TEST_REQUIRES_X86_AVX512SKX;
33553 GemmMicrokernelTester()
33554 .mr(2)
33555 .nr(16)
33556 .kr(8)
33557 .sr(1)
33558 .m(2)
33559 .n(16)
33560 .k(8)
33561 .qmax(128)
33562 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
33563 }
33564
33565 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, strided_cm) {
33566 TEST_REQUIRES_X86_AVX512SKX;
33567 GemmMicrokernelTester()
33568 .mr(2)
33569 .nr(16)
33570 .kr(8)
33571 .sr(1)
33572 .m(2)
33573 .n(16)
33574 .k(8)
33575 .cm_stride(19)
33576 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
33577 }
33578
33579 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, no_a_zero_point) {
33580 TEST_REQUIRES_X86_AVX512SKX;
33581 for (size_t k = 1; k <= 40; k += 9) {
33582 GemmMicrokernelTester()
33583 .mr(2)
33584 .nr(16)
33585 .kr(8)
33586 .sr(1)
33587 .m(2)
33588 .n(16)
33589 .k(k)
33590 .a_zero_point(0)
33591 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
33592 }
33593 }
33594
33595 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, no_b_zero_point) {
33596 TEST_REQUIRES_X86_AVX512SKX;
33597 for (size_t k = 1; k <= 40; k += 9) {
33598 GemmMicrokernelTester()
33599 .mr(2)
33600 .nr(16)
33601 .kr(8)
33602 .sr(1)
33603 .m(2)
33604 .n(16)
33605 .k(k)
33606 .b_zero_point(0)
33607 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
33608 }
33609 }
33610
33611 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, no_zero_point) {
33612 TEST_REQUIRES_X86_AVX512SKX;
33613 for (size_t k = 1; k <= 40; k += 9) {
33614 GemmMicrokernelTester()
33615 .mr(2)
33616 .nr(16)
33617 .kr(8)
33618 .sr(1)
33619 .m(2)
33620 .n(16)
33621 .k(k)
33622 .a_zero_point(0)
33623 .b_zero_point(0)
33624 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
33625 }
33626 }
33627#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
33628
33629
33630#if XNN_ARCH_X86 || XNN_ARCH_X86_64
33631 TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_eq_8) {
33632 TEST_REQUIRES_X86_AVX512SKX;
33633 GemmMicrokernelTester()
33634 .mr(3)
33635 .nr(16)
33636 .kr(8)
33637 .sr(1)
33638 .m(3)
33639 .n(16)
33640 .k(8)
33641 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
33642 }
33643
33644 TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, strided_cn) {
33645 TEST_REQUIRES_X86_AVX512SKX;
33646 GemmMicrokernelTester()
33647 .mr(3)
33648 .nr(16)
33649 .kr(8)
33650 .sr(1)
33651 .m(3)
33652 .n(16)
33653 .k(8)
33654 .cn_stride(19)
33655 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
33656 }
33657
33658 TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_eq_8_subtile) {
33659 TEST_REQUIRES_X86_AVX512SKX;
33660 for (uint32_t m = 1; m <= 3; m++) {
33661 for (uint32_t n = 1; n <= 16; n++) {
33662 GemmMicrokernelTester()
33663 .mr(3)
33664 .nr(16)
33665 .kr(8)
33666 .sr(1)
33667 .m(m)
33668 .n(n)
33669 .k(8)
33670 .iterations(1)
33671 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
33672 }
33673 }
33674 }
33675
33676 TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_eq_8_subtile_m) {
33677 TEST_REQUIRES_X86_AVX512SKX;
33678 for (uint32_t m = 1; m <= 3; m++) {
33679 GemmMicrokernelTester()
33680 .mr(3)
33681 .nr(16)
33682 .kr(8)
33683 .sr(1)
33684 .m(m)
33685 .n(16)
33686 .k(8)
33687 .iterations(1)
33688 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
33689 }
33690 }
33691
33692 TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_eq_8_subtile_n) {
33693 TEST_REQUIRES_X86_AVX512SKX;
33694 for (uint32_t n = 1; n <= 16; n++) {
33695 GemmMicrokernelTester()
33696 .mr(3)
33697 .nr(16)
33698 .kr(8)
33699 .sr(1)
33700 .m(3)
33701 .n(n)
33702 .k(8)
33703 .iterations(1)
33704 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
33705 }
33706 }
33707
33708 TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_lt_8) {
33709 TEST_REQUIRES_X86_AVX512SKX;
33710 for (size_t k = 1; k < 8; k++) {
33711 GemmMicrokernelTester()
33712 .mr(3)
33713 .nr(16)
33714 .kr(8)
33715 .sr(1)
33716 .m(3)
33717 .n(16)
33718 .k(k)
33719 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
33720 }
33721 }
33722
33723 TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_lt_8_subtile) {
33724 TEST_REQUIRES_X86_AVX512SKX;
33725 for (size_t k = 1; k < 8; k++) {
33726 for (uint32_t m = 1; m <= 3; m++) {
33727 for (uint32_t n = 1; n <= 16; n++) {
33728 GemmMicrokernelTester()
33729 .mr(3)
33730 .nr(16)
33731 .kr(8)
33732 .sr(1)
33733 .m(m)
33734 .n(n)
33735 .k(k)
33736 .iterations(1)
33737 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
33738 }
33739 }
33740 }
33741 }
33742
33743 TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_gt_8) {
33744 TEST_REQUIRES_X86_AVX512SKX;
33745 for (size_t k = 9; k < 16; k++) {
33746 GemmMicrokernelTester()
33747 .mr(3)
33748 .nr(16)
33749 .kr(8)
33750 .sr(1)
33751 .m(3)
33752 .n(16)
33753 .k(k)
33754 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
33755 }
33756 }
33757
33758 TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_gt_8_subtile) {
33759 TEST_REQUIRES_X86_AVX512SKX;
33760 for (size_t k = 9; k < 16; k++) {
33761 for (uint32_t m = 1; m <= 3; m++) {
33762 for (uint32_t n = 1; n <= 16; n++) {
33763 GemmMicrokernelTester()
33764 .mr(3)
33765 .nr(16)
33766 .kr(8)
33767 .sr(1)
33768 .m(m)
33769 .n(n)
33770 .k(k)
33771 .iterations(1)
33772 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
33773 }
33774 }
33775 }
33776 }
33777
33778 TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_div_8) {
33779 TEST_REQUIRES_X86_AVX512SKX;
33780 for (size_t k = 16; k <= 80; k += 8) {
33781 GemmMicrokernelTester()
33782 .mr(3)
33783 .nr(16)
33784 .kr(8)
33785 .sr(1)
33786 .m(3)
33787 .n(16)
33788 .k(k)
33789 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
33790 }
33791 }
33792
33793 TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_div_8_subtile) {
33794 TEST_REQUIRES_X86_AVX512SKX;
33795 for (size_t k = 16; k <= 80; k += 8) {
33796 for (uint32_t m = 1; m <= 3; m++) {
33797 for (uint32_t n = 1; n <= 16; n++) {
33798 GemmMicrokernelTester()
33799 .mr(3)
33800 .nr(16)
33801 .kr(8)
33802 .sr(1)
33803 .m(m)
33804 .n(n)
33805 .k(k)
33806 .iterations(1)
33807 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
33808 }
33809 }
33810 }
33811 }
33812
33813 TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_gt_16) {
33814 TEST_REQUIRES_X86_AVX512SKX;
33815 for (uint32_t n = 17; n < 32; n++) {
33816 for (size_t k = 1; k <= 40; k += 9) {
33817 GemmMicrokernelTester()
33818 .mr(3)
33819 .nr(16)
33820 .kr(8)
33821 .sr(1)
33822 .m(3)
33823 .n(16)
33824 .k(k)
33825 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
33826 }
33827 }
33828 }
33829
33830 TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_gt_16_strided_cn) {
33831 TEST_REQUIRES_X86_AVX512SKX;
33832 for (uint32_t n = 17; n < 32; n++) {
33833 for (size_t k = 1; k <= 40; k += 9) {
33834 GemmMicrokernelTester()
33835 .mr(3)
33836 .nr(16)
33837 .kr(8)
33838 .sr(1)
33839 .m(3)
33840 .n(16)
33841 .k(k)
33842 .cn_stride(19)
33843 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
33844 }
33845 }
33846 }
33847
33848 TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_gt_16_subtile) {
33849 TEST_REQUIRES_X86_AVX512SKX;
33850 for (uint32_t n = 17; n < 32; n++) {
33851 for (size_t k = 1; k <= 40; k += 9) {
33852 for (uint32_t m = 1; m <= 3; m++) {
33853 GemmMicrokernelTester()
33854 .mr(3)
33855 .nr(16)
33856 .kr(8)
33857 .sr(1)
33858 .m(m)
33859 .n(n)
33860 .k(k)
33861 .iterations(1)
33862 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
33863 }
33864 }
33865 }
33866 }
33867
33868 TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_div_16) {
33869 TEST_REQUIRES_X86_AVX512SKX;
33870 for (uint32_t n = 32; n <= 48; n += 16) {
33871 for (size_t k = 1; k <= 40; k += 9) {
33872 GemmMicrokernelTester()
33873 .mr(3)
33874 .nr(16)
33875 .kr(8)
33876 .sr(1)
33877 .m(3)
33878 .n(16)
33879 .k(k)
33880 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
33881 }
33882 }
33883 }
33884
33885 TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_div_16_strided_cn) {
33886 TEST_REQUIRES_X86_AVX512SKX;
33887 for (uint32_t n = 32; n <= 48; n += 16) {
33888 for (size_t k = 1; k <= 40; k += 9) {
33889 GemmMicrokernelTester()
33890 .mr(3)
33891 .nr(16)
33892 .kr(8)
33893 .sr(1)
33894 .m(3)
33895 .n(n)
33896 .k(k)
33897 .cn_stride(19)
33898 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
33899 }
33900 }
33901 }
33902
33903 TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_div_16_subtile) {
33904 TEST_REQUIRES_X86_AVX512SKX;
33905 for (uint32_t n = 32; n <= 48; n += 16) {
33906 for (size_t k = 1; k <= 40; k += 9) {
33907 for (uint32_t m = 1; m <= 3; m++) {
33908 GemmMicrokernelTester()
33909 .mr(3)
33910 .nr(16)
33911 .kr(8)
33912 .sr(1)
33913 .m(m)
33914 .n(n)
33915 .k(k)
33916 .iterations(1)
33917 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
33918 }
33919 }
33920 }
33921 }
33922
33923 TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, small_kernel) {
33924 TEST_REQUIRES_X86_AVX512SKX;
33925 for (size_t k = 1; k <= 40; k += 9) {
33926 GemmMicrokernelTester()
33927 .mr(3)
33928 .nr(16)
33929 .kr(8)
33930 .sr(1)
33931 .m(3)
33932 .n(16)
33933 .k(k)
33934 .ks(3)
33935 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
33936 }
33937 }
33938
33939 TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, small_kernel_subtile) {
33940 TEST_REQUIRES_X86_AVX512SKX;
33941 for (size_t k = 1; k <= 40; k += 9) {
33942 for (uint32_t m = 1; m <= 3; m++) {
33943 for (uint32_t n = 1; n <= 16; n++) {
33944 GemmMicrokernelTester()
33945 .mr(3)
33946 .nr(16)
33947 .kr(8)
33948 .sr(1)
33949 .m(m)
33950 .n(n)
33951 .k(k)
33952 .ks(3)
33953 .iterations(1)
33954 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
33955 }
33956 }
33957 }
33958 }
33959
33960 TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_gt_16_small_kernel) {
33961 TEST_REQUIRES_X86_AVX512SKX;
33962 for (uint32_t n = 17; n < 32; n++) {
33963 for (size_t k = 1; k <= 40; k += 9) {
33964 GemmMicrokernelTester()
33965 .mr(3)
33966 .nr(16)
33967 .kr(8)
33968 .sr(1)
33969 .m(3)
33970 .n(16)
33971 .k(k)
33972 .ks(3)
33973 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
33974 }
33975 }
33976 }
33977
33978 TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_div_16_small_kernel) {
33979 TEST_REQUIRES_X86_AVX512SKX;
33980 for (uint32_t n = 32; n <= 48; n += 16) {
33981 for (size_t k = 1; k <= 40; k += 9) {
33982 GemmMicrokernelTester()
33983 .mr(3)
33984 .nr(16)
33985 .kr(8)
33986 .sr(1)
33987 .m(3)
33988 .n(16)
33989 .k(k)
33990 .ks(3)
33991 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
33992 }
33993 }
33994 }
33995
33996 TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, strided_cm_subtile) {
33997 TEST_REQUIRES_X86_AVX512SKX;
33998 for (size_t k = 1; k <= 40; k += 9) {
33999 for (uint32_t m = 1; m <= 3; m++) {
34000 for (uint32_t n = 1; n <= 16; n++) {
34001 GemmMicrokernelTester()
34002 .mr(3)
34003 .nr(16)
34004 .kr(8)
34005 .sr(1)
34006 .m(m)
34007 .n(n)
34008 .k(k)
34009 .cm_stride(19)
34010 .iterations(1)
34011 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
34012 }
34013 }
34014 }
34015 }
34016
34017 TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, a_offset) {
34018 TEST_REQUIRES_X86_AVX512SKX;
34019 for (size_t k = 1; k <= 40; k += 9) {
34020 GemmMicrokernelTester()
34021 .mr(3)
34022 .nr(16)
34023 .kr(8)
34024 .sr(1)
34025 .m(3)
34026 .n(16)
34027 .k(k)
34028 .ks(3)
34029 .a_offset(127)
34030 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
34031 }
34032 }
34033
34034 TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, zero) {
34035 TEST_REQUIRES_X86_AVX512SKX;
34036 for (uint32_t mz = 0; mz < 3; mz++) {
34037 for (size_t k = 1; k <= 40; k += 9) {
34038 GemmMicrokernelTester()
34039 .mr(3)
34040 .nr(16)
34041 .kr(8)
34042 .sr(1)
34043 .m(3)
34044 .n(16)
34045 .k(k)
34046 .ks(3)
34047 .a_offset(127)
34048 .zero_index(mz)
34049 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
34050 }
34051 }
34052 }
34053
34054 TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, qmin) {
34055 TEST_REQUIRES_X86_AVX512SKX;
34056 GemmMicrokernelTester()
34057 .mr(3)
34058 .nr(16)
34059 .kr(8)
34060 .sr(1)
34061 .m(3)
34062 .n(16)
34063 .k(8)
34064 .qmin(128)
34065 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
34066 }
34067
34068 TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, qmax) {
34069 TEST_REQUIRES_X86_AVX512SKX;
34070 GemmMicrokernelTester()
34071 .mr(3)
34072 .nr(16)
34073 .kr(8)
34074 .sr(1)
34075 .m(3)
34076 .n(16)
34077 .k(8)
34078 .qmax(128)
34079 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
34080 }
34081
34082 TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, strided_cm) {
34083 TEST_REQUIRES_X86_AVX512SKX;
34084 GemmMicrokernelTester()
34085 .mr(3)
34086 .nr(16)
34087 .kr(8)
34088 .sr(1)
34089 .m(3)
34090 .n(16)
34091 .k(8)
34092 .cm_stride(19)
34093 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
34094 }
34095
34096 TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, no_a_zero_point) {
34097 TEST_REQUIRES_X86_AVX512SKX;
34098 for (size_t k = 1; k <= 40; k += 9) {
34099 GemmMicrokernelTester()
34100 .mr(3)
34101 .nr(16)
34102 .kr(8)
34103 .sr(1)
34104 .m(3)
34105 .n(16)
34106 .k(k)
34107 .a_zero_point(0)
34108 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
34109 }
34110 }
34111
34112 TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, no_b_zero_point) {
34113 TEST_REQUIRES_X86_AVX512SKX;
34114 for (size_t k = 1; k <= 40; k += 9) {
34115 GemmMicrokernelTester()
34116 .mr(3)
34117 .nr(16)
34118 .kr(8)
34119 .sr(1)
34120 .m(3)
34121 .n(16)
34122 .k(k)
34123 .b_zero_point(0)
34124 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
34125 }
34126 }
34127
34128 TEST(QU8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, no_zero_point) {
34129 TEST_REQUIRES_X86_AVX512SKX;
34130 for (size_t k = 1; k <= 40; k += 9) {
34131 GemmMicrokernelTester()
34132 .mr(3)
34133 .nr(16)
34134 .kr(8)
34135 .sr(1)
34136 .m(3)
34137 .n(16)
34138 .k(k)
34139 .a_zero_point(0)
34140 .b_zero_point(0)
34141 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
34142 }
34143 }
34144#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
34145
34146
34147#if XNN_ARCH_X86 || XNN_ARCH_X86_64
34148 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_eq_8) {
34149 TEST_REQUIRES_X86_AVX512SKX;
34150 GemmMicrokernelTester()
34151 .mr(4)
34152 .nr(16)
34153 .kr(8)
34154 .sr(1)
34155 .m(4)
34156 .n(16)
34157 .k(8)
34158 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
34159 }
34160
34161 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, strided_cn) {
34162 TEST_REQUIRES_X86_AVX512SKX;
34163 GemmMicrokernelTester()
34164 .mr(4)
34165 .nr(16)
34166 .kr(8)
34167 .sr(1)
34168 .m(4)
34169 .n(16)
34170 .k(8)
34171 .cn_stride(19)
34172 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
34173 }
34174
34175 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_eq_8_subtile) {
34176 TEST_REQUIRES_X86_AVX512SKX;
34177 for (uint32_t m = 1; m <= 4; m++) {
34178 for (uint32_t n = 1; n <= 16; n++) {
34179 GemmMicrokernelTester()
34180 .mr(4)
34181 .nr(16)
34182 .kr(8)
34183 .sr(1)
34184 .m(m)
34185 .n(n)
34186 .k(8)
34187 .iterations(1)
34188 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
34189 }
34190 }
34191 }
34192
34193 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_eq_8_subtile_m) {
34194 TEST_REQUIRES_X86_AVX512SKX;
34195 for (uint32_t m = 1; m <= 4; m++) {
34196 GemmMicrokernelTester()
34197 .mr(4)
34198 .nr(16)
34199 .kr(8)
34200 .sr(1)
34201 .m(m)
34202 .n(16)
34203 .k(8)
34204 .iterations(1)
34205 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
34206 }
34207 }
34208
34209 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_eq_8_subtile_n) {
34210 TEST_REQUIRES_X86_AVX512SKX;
34211 for (uint32_t n = 1; n <= 16; n++) {
34212 GemmMicrokernelTester()
34213 .mr(4)
34214 .nr(16)
34215 .kr(8)
34216 .sr(1)
34217 .m(4)
34218 .n(n)
34219 .k(8)
34220 .iterations(1)
34221 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
34222 }
34223 }
34224
34225 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_lt_8) {
34226 TEST_REQUIRES_X86_AVX512SKX;
34227 for (size_t k = 1; k < 8; k++) {
34228 GemmMicrokernelTester()
34229 .mr(4)
34230 .nr(16)
34231 .kr(8)
34232 .sr(1)
34233 .m(4)
34234 .n(16)
34235 .k(k)
34236 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
34237 }
34238 }
34239
34240 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_lt_8_subtile) {
34241 TEST_REQUIRES_X86_AVX512SKX;
34242 for (size_t k = 1; k < 8; k++) {
34243 for (uint32_t m = 1; m <= 4; m++) {
34244 for (uint32_t n = 1; n <= 16; n++) {
34245 GemmMicrokernelTester()
34246 .mr(4)
34247 .nr(16)
34248 .kr(8)
34249 .sr(1)
34250 .m(m)
34251 .n(n)
34252 .k(k)
34253 .iterations(1)
34254 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
34255 }
34256 }
34257 }
34258 }
34259
34260 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_gt_8) {
34261 TEST_REQUIRES_X86_AVX512SKX;
34262 for (size_t k = 9; k < 16; k++) {
34263 GemmMicrokernelTester()
34264 .mr(4)
34265 .nr(16)
34266 .kr(8)
34267 .sr(1)
34268 .m(4)
34269 .n(16)
34270 .k(k)
34271 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
34272 }
34273 }
34274
34275 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_gt_8_subtile) {
34276 TEST_REQUIRES_X86_AVX512SKX;
34277 for (size_t k = 9; k < 16; k++) {
34278 for (uint32_t m = 1; m <= 4; m++) {
34279 for (uint32_t n = 1; n <= 16; n++) {
34280 GemmMicrokernelTester()
34281 .mr(4)
34282 .nr(16)
34283 .kr(8)
34284 .sr(1)
34285 .m(m)
34286 .n(n)
34287 .k(k)
34288 .iterations(1)
34289 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
34290 }
34291 }
34292 }
34293 }
34294
34295 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_div_8) {
34296 TEST_REQUIRES_X86_AVX512SKX;
34297 for (size_t k = 16; k <= 80; k += 8) {
34298 GemmMicrokernelTester()
34299 .mr(4)
34300 .nr(16)
34301 .kr(8)
34302 .sr(1)
34303 .m(4)
34304 .n(16)
34305 .k(k)
34306 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
34307 }
34308 }
34309
34310 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_div_8_subtile) {
34311 TEST_REQUIRES_X86_AVX512SKX;
34312 for (size_t k = 16; k <= 80; k += 8) {
34313 for (uint32_t m = 1; m <= 4; m++) {
34314 for (uint32_t n = 1; n <= 16; n++) {
34315 GemmMicrokernelTester()
34316 .mr(4)
34317 .nr(16)
34318 .kr(8)
34319 .sr(1)
34320 .m(m)
34321 .n(n)
34322 .k(k)
34323 .iterations(1)
34324 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
34325 }
34326 }
34327 }
34328 }
34329
34330 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, n_gt_16) {
34331 TEST_REQUIRES_X86_AVX512SKX;
34332 for (uint32_t n = 17; n < 32; n++) {
34333 for (size_t k = 1; k <= 40; k += 9) {
34334 GemmMicrokernelTester()
34335 .mr(4)
34336 .nr(16)
34337 .kr(8)
34338 .sr(1)
34339 .m(4)
34340 .n(16)
34341 .k(k)
34342 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
34343 }
34344 }
34345 }
34346
34347 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, n_gt_16_strided_cn) {
34348 TEST_REQUIRES_X86_AVX512SKX;
34349 for (uint32_t n = 17; n < 32; n++) {
34350 for (size_t k = 1; k <= 40; k += 9) {
34351 GemmMicrokernelTester()
34352 .mr(4)
34353 .nr(16)
34354 .kr(8)
34355 .sr(1)
34356 .m(4)
34357 .n(16)
34358 .k(k)
34359 .cn_stride(19)
34360 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
34361 }
34362 }
34363 }
34364
34365 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, n_gt_16_subtile) {
34366 TEST_REQUIRES_X86_AVX512SKX;
34367 for (uint32_t n = 17; n < 32; n++) {
34368 for (size_t k = 1; k <= 40; k += 9) {
34369 for (uint32_t m = 1; m <= 4; m++) {
34370 GemmMicrokernelTester()
34371 .mr(4)
34372 .nr(16)
34373 .kr(8)
34374 .sr(1)
34375 .m(m)
34376 .n(n)
34377 .k(k)
34378 .iterations(1)
34379 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
34380 }
34381 }
34382 }
34383 }
34384
34385 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, n_div_16) {
34386 TEST_REQUIRES_X86_AVX512SKX;
34387 for (uint32_t n = 32; n <= 48; n += 16) {
34388 for (size_t k = 1; k <= 40; k += 9) {
34389 GemmMicrokernelTester()
34390 .mr(4)
34391 .nr(16)
34392 .kr(8)
34393 .sr(1)
34394 .m(4)
34395 .n(16)
34396 .k(k)
34397 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
34398 }
34399 }
34400 }
34401
34402 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, n_div_16_strided_cn) {
34403 TEST_REQUIRES_X86_AVX512SKX;
34404 for (uint32_t n = 32; n <= 48; n += 16) {
34405 for (size_t k = 1; k <= 40; k += 9) {
34406 GemmMicrokernelTester()
34407 .mr(4)
34408 .nr(16)
34409 .kr(8)
34410 .sr(1)
34411 .m(4)
34412 .n(n)
34413 .k(k)
34414 .cn_stride(19)
34415 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
34416 }
34417 }
34418 }
34419
34420 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, n_div_16_subtile) {
34421 TEST_REQUIRES_X86_AVX512SKX;
34422 for (uint32_t n = 32; n <= 48; n += 16) {
34423 for (size_t k = 1; k <= 40; k += 9) {
34424 for (uint32_t m = 1; m <= 4; m++) {
34425 GemmMicrokernelTester()
34426 .mr(4)
34427 .nr(16)
34428 .kr(8)
34429 .sr(1)
34430 .m(m)
34431 .n(n)
34432 .k(k)
34433 .iterations(1)
34434 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
34435 }
34436 }
34437 }
34438 }
34439
34440 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, small_kernel) {
34441 TEST_REQUIRES_X86_AVX512SKX;
34442 for (size_t k = 1; k <= 40; k += 9) {
34443 GemmMicrokernelTester()
34444 .mr(4)
34445 .nr(16)
34446 .kr(8)
34447 .sr(1)
34448 .m(4)
34449 .n(16)
34450 .k(k)
34451 .ks(3)
34452 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
34453 }
34454 }
34455
34456 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, small_kernel_subtile) {
34457 TEST_REQUIRES_X86_AVX512SKX;
34458 for (size_t k = 1; k <= 40; k += 9) {
34459 for (uint32_t m = 1; m <= 4; m++) {
34460 for (uint32_t n = 1; n <= 16; n++) {
34461 GemmMicrokernelTester()
34462 .mr(4)
34463 .nr(16)
34464 .kr(8)
34465 .sr(1)
34466 .m(m)
34467 .n(n)
34468 .k(k)
34469 .ks(3)
34470 .iterations(1)
34471 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
34472 }
34473 }
34474 }
34475 }
34476
34477 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, n_gt_16_small_kernel) {
34478 TEST_REQUIRES_X86_AVX512SKX;
34479 for (uint32_t n = 17; n < 32; n++) {
34480 for (size_t k = 1; k <= 40; k += 9) {
34481 GemmMicrokernelTester()
34482 .mr(4)
34483 .nr(16)
34484 .kr(8)
34485 .sr(1)
34486 .m(4)
34487 .n(16)
34488 .k(k)
34489 .ks(3)
34490 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
34491 }
34492 }
34493 }
34494
34495 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, n_div_16_small_kernel) {
34496 TEST_REQUIRES_X86_AVX512SKX;
34497 for (uint32_t n = 32; n <= 48; n += 16) {
34498 for (size_t k = 1; k <= 40; k += 9) {
34499 GemmMicrokernelTester()
34500 .mr(4)
34501 .nr(16)
34502 .kr(8)
34503 .sr(1)
34504 .m(4)
34505 .n(16)
34506 .k(k)
34507 .ks(3)
34508 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
34509 }
34510 }
34511 }
34512
34513 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, strided_cm_subtile) {
34514 TEST_REQUIRES_X86_AVX512SKX;
34515 for (size_t k = 1; k <= 40; k += 9) {
34516 for (uint32_t m = 1; m <= 4; m++) {
34517 for (uint32_t n = 1; n <= 16; n++) {
34518 GemmMicrokernelTester()
34519 .mr(4)
34520 .nr(16)
34521 .kr(8)
34522 .sr(1)
34523 .m(m)
34524 .n(n)
34525 .k(k)
34526 .cm_stride(19)
34527 .iterations(1)
34528 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
34529 }
34530 }
34531 }
34532 }
34533
34534 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, a_offset) {
34535 TEST_REQUIRES_X86_AVX512SKX;
34536 for (size_t k = 1; k <= 40; k += 9) {
34537 GemmMicrokernelTester()
34538 .mr(4)
34539 .nr(16)
34540 .kr(8)
34541 .sr(1)
34542 .m(4)
34543 .n(16)
34544 .k(k)
34545 .ks(3)
34546 .a_offset(163)
34547 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
34548 }
34549 }
34550
34551 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, zero) {
34552 TEST_REQUIRES_X86_AVX512SKX;
34553 for (uint32_t mz = 0; mz < 4; mz++) {
34554 for (size_t k = 1; k <= 40; k += 9) {
34555 GemmMicrokernelTester()
34556 .mr(4)
34557 .nr(16)
34558 .kr(8)
34559 .sr(1)
34560 .m(4)
34561 .n(16)
34562 .k(k)
34563 .ks(3)
34564 .a_offset(163)
34565 .zero_index(mz)
34566 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
34567 }
34568 }
34569 }
34570
34571 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, qmin) {
34572 TEST_REQUIRES_X86_AVX512SKX;
34573 GemmMicrokernelTester()
34574 .mr(4)
34575 .nr(16)
34576 .kr(8)
34577 .sr(1)
34578 .m(4)
34579 .n(16)
34580 .k(8)
34581 .qmin(128)
34582 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
34583 }
34584
34585 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, qmax) {
34586 TEST_REQUIRES_X86_AVX512SKX;
34587 GemmMicrokernelTester()
34588 .mr(4)
34589 .nr(16)
34590 .kr(8)
34591 .sr(1)
34592 .m(4)
34593 .n(16)
34594 .k(8)
34595 .qmax(128)
34596 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
34597 }
34598
34599 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, strided_cm) {
34600 TEST_REQUIRES_X86_AVX512SKX;
34601 GemmMicrokernelTester()
34602 .mr(4)
34603 .nr(16)
34604 .kr(8)
34605 .sr(1)
34606 .m(4)
34607 .n(16)
34608 .k(8)
34609 .cm_stride(19)
34610 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
34611 }
34612
34613 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, no_a_zero_point) {
34614 TEST_REQUIRES_X86_AVX512SKX;
34615 for (size_t k = 1; k <= 40; k += 9) {
34616 GemmMicrokernelTester()
34617 .mr(4)
34618 .nr(16)
34619 .kr(8)
34620 .sr(1)
34621 .m(4)
34622 .n(16)
34623 .k(k)
34624 .a_zero_point(0)
34625 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
34626 }
34627 }
34628
34629 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, no_b_zero_point) {
34630 TEST_REQUIRES_X86_AVX512SKX;
34631 for (size_t k = 1; k <= 40; k += 9) {
34632 GemmMicrokernelTester()
34633 .mr(4)
34634 .nr(16)
34635 .kr(8)
34636 .sr(1)
34637 .m(4)
34638 .n(16)
34639 .k(k)
34640 .b_zero_point(0)
34641 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
34642 }
34643 }
34644
34645 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, no_zero_point) {
34646 TEST_REQUIRES_X86_AVX512SKX;
34647 for (size_t k = 1; k <= 40; k += 9) {
34648 GemmMicrokernelTester()
34649 .mr(4)
34650 .nr(16)
34651 .kr(8)
34652 .sr(1)
34653 .m(4)
34654 .n(16)
34655 .k(k)
34656 .a_zero_point(0)
34657 .b_zero_point(0)
34658 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
34659 }
34660 }
34661#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan43bee052021-07-14 20:57:18 -070034662
34663
34664#if XNN_ARCH_WASMSIMD
34665 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_LD64, k_eq_8) {
34666 GemmMicrokernelTester()
34667 .mr(1)
34668 .nr(4)
34669 .kr(8)
34670 .sr(1)
34671 .m(1)
34672 .n(4)
34673 .k(8)
34674 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
34675 }
34676
34677 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_LD64, strided_cn) {
34678 GemmMicrokernelTester()
34679 .mr(1)
34680 .nr(4)
34681 .kr(8)
34682 .sr(1)
34683 .m(1)
34684 .n(4)
34685 .k(8)
34686 .cn_stride(7)
34687 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
34688 }
34689
34690 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_LD64, k_eq_8_subtile) {
34691 for (uint32_t m = 1; m <= 1; m++) {
34692 for (uint32_t n = 1; n <= 4; n++) {
34693 GemmMicrokernelTester()
34694 .mr(1)
34695 .nr(4)
34696 .kr(8)
34697 .sr(1)
34698 .m(m)
34699 .n(n)
34700 .k(8)
34701 .iterations(1)
34702 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
34703 }
34704 }
34705 }
34706
34707 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_LD64, k_eq_8_subtile_m) {
34708 for (uint32_t m = 1; m <= 1; m++) {
34709 GemmMicrokernelTester()
34710 .mr(1)
34711 .nr(4)
34712 .kr(8)
34713 .sr(1)
34714 .m(m)
34715 .n(4)
34716 .k(8)
34717 .iterations(1)
34718 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
34719 }
34720 }
34721
34722 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_LD64, k_eq_8_subtile_n) {
34723 for (uint32_t n = 1; n <= 4; n++) {
34724 GemmMicrokernelTester()
34725 .mr(1)
34726 .nr(4)
34727 .kr(8)
34728 .sr(1)
34729 .m(1)
34730 .n(n)
34731 .k(8)
34732 .iterations(1)
34733 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
34734 }
34735 }
34736
34737 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_LD64, k_lt_8) {
34738 for (size_t k = 1; k < 8; k++) {
34739 GemmMicrokernelTester()
34740 .mr(1)
34741 .nr(4)
34742 .kr(8)
34743 .sr(1)
34744 .m(1)
34745 .n(4)
34746 .k(k)
34747 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
34748 }
34749 }
34750
34751 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_LD64, k_lt_8_subtile) {
34752 for (size_t k = 1; k < 8; k++) {
34753 for (uint32_t m = 1; m <= 1; m++) {
34754 for (uint32_t n = 1; n <= 4; n++) {
34755 GemmMicrokernelTester()
34756 .mr(1)
34757 .nr(4)
34758 .kr(8)
34759 .sr(1)
34760 .m(m)
34761 .n(n)
34762 .k(k)
34763 .iterations(1)
34764 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
34765 }
34766 }
34767 }
34768 }
34769
34770 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_LD64, k_gt_8) {
34771 for (size_t k = 9; k < 16; k++) {
34772 GemmMicrokernelTester()
34773 .mr(1)
34774 .nr(4)
34775 .kr(8)
34776 .sr(1)
34777 .m(1)
34778 .n(4)
34779 .k(k)
34780 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
34781 }
34782 }
34783
34784 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_LD64, k_gt_8_subtile) {
34785 for (size_t k = 9; k < 16; k++) {
34786 for (uint32_t m = 1; m <= 1; m++) {
34787 for (uint32_t n = 1; n <= 4; n++) {
34788 GemmMicrokernelTester()
34789 .mr(1)
34790 .nr(4)
34791 .kr(8)
34792 .sr(1)
34793 .m(m)
34794 .n(n)
34795 .k(k)
34796 .iterations(1)
34797 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
34798 }
34799 }
34800 }
34801 }
34802
34803 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_LD64, k_div_8) {
34804 for (size_t k = 16; k <= 80; k += 8) {
34805 GemmMicrokernelTester()
34806 .mr(1)
34807 .nr(4)
34808 .kr(8)
34809 .sr(1)
34810 .m(1)
34811 .n(4)
34812 .k(k)
34813 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
34814 }
34815 }
34816
34817 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_LD64, k_div_8_subtile) {
34818 for (size_t k = 16; k <= 80; k += 8) {
34819 for (uint32_t m = 1; m <= 1; m++) {
34820 for (uint32_t n = 1; n <= 4; n++) {
34821 GemmMicrokernelTester()
34822 .mr(1)
34823 .nr(4)
34824 .kr(8)
34825 .sr(1)
34826 .m(m)
34827 .n(n)
34828 .k(k)
34829 .iterations(1)
34830 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
34831 }
34832 }
34833 }
34834 }
34835
34836 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_LD64, n_gt_4) {
34837 for (uint32_t n = 5; n < 8; n++) {
34838 for (size_t k = 1; k <= 40; k += 9) {
34839 GemmMicrokernelTester()
34840 .mr(1)
34841 .nr(4)
34842 .kr(8)
34843 .sr(1)
34844 .m(1)
34845 .n(4)
34846 .k(k)
34847 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
34848 }
34849 }
34850 }
34851
34852 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_LD64, n_gt_4_strided_cn) {
34853 for (uint32_t n = 5; n < 8; n++) {
34854 for (size_t k = 1; k <= 40; k += 9) {
34855 GemmMicrokernelTester()
34856 .mr(1)
34857 .nr(4)
34858 .kr(8)
34859 .sr(1)
34860 .m(1)
34861 .n(4)
34862 .k(k)
34863 .cn_stride(7)
34864 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
34865 }
34866 }
34867 }
34868
34869 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_LD64, n_gt_4_subtile) {
34870 for (uint32_t n = 5; n < 8; n++) {
34871 for (size_t k = 1; k <= 40; k += 9) {
34872 for (uint32_t m = 1; m <= 1; m++) {
34873 GemmMicrokernelTester()
34874 .mr(1)
34875 .nr(4)
34876 .kr(8)
34877 .sr(1)
34878 .m(m)
34879 .n(n)
34880 .k(k)
34881 .iterations(1)
34882 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
34883 }
34884 }
34885 }
34886 }
34887
34888 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_LD64, n_div_4) {
34889 for (uint32_t n = 8; n <= 12; n += 4) {
34890 for (size_t k = 1; k <= 40; k += 9) {
34891 GemmMicrokernelTester()
34892 .mr(1)
34893 .nr(4)
34894 .kr(8)
34895 .sr(1)
34896 .m(1)
34897 .n(4)
34898 .k(k)
34899 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
34900 }
34901 }
34902 }
34903
34904 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_LD64, n_div_4_strided_cn) {
34905 for (uint32_t n = 8; n <= 12; n += 4) {
34906 for (size_t k = 1; k <= 40; k += 9) {
34907 GemmMicrokernelTester()
34908 .mr(1)
34909 .nr(4)
34910 .kr(8)
34911 .sr(1)
34912 .m(1)
34913 .n(n)
34914 .k(k)
34915 .cn_stride(7)
34916 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
34917 }
34918 }
34919 }
34920
34921 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_LD64, n_div_4_subtile) {
34922 for (uint32_t n = 8; n <= 12; n += 4) {
34923 for (size_t k = 1; k <= 40; k += 9) {
34924 for (uint32_t m = 1; m <= 1; m++) {
34925 GemmMicrokernelTester()
34926 .mr(1)
34927 .nr(4)
34928 .kr(8)
34929 .sr(1)
34930 .m(m)
34931 .n(n)
34932 .k(k)
34933 .iterations(1)
34934 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
34935 }
34936 }
34937 }
34938 }
34939
34940 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_LD64, small_kernel) {
34941 for (size_t k = 1; k <= 40; k += 9) {
34942 GemmMicrokernelTester()
34943 .mr(1)
34944 .nr(4)
34945 .kr(8)
34946 .sr(1)
34947 .m(1)
34948 .n(4)
34949 .k(k)
34950 .ks(3)
34951 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
34952 }
34953 }
34954
34955 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_LD64, small_kernel_subtile) {
34956 for (size_t k = 1; k <= 40; k += 9) {
34957 for (uint32_t m = 1; m <= 1; m++) {
34958 for (uint32_t n = 1; n <= 4; n++) {
34959 GemmMicrokernelTester()
34960 .mr(1)
34961 .nr(4)
34962 .kr(8)
34963 .sr(1)
34964 .m(m)
34965 .n(n)
34966 .k(k)
34967 .ks(3)
34968 .iterations(1)
34969 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
34970 }
34971 }
34972 }
34973 }
34974
34975 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_LD64, n_gt_4_small_kernel) {
34976 for (uint32_t n = 5; n < 8; n++) {
34977 for (size_t k = 1; k <= 40; k += 9) {
34978 GemmMicrokernelTester()
34979 .mr(1)
34980 .nr(4)
34981 .kr(8)
34982 .sr(1)
34983 .m(1)
34984 .n(4)
34985 .k(k)
34986 .ks(3)
34987 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
34988 }
34989 }
34990 }
34991
34992 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_LD64, n_div_4_small_kernel) {
34993 for (uint32_t n = 8; n <= 12; n += 4) {
34994 for (size_t k = 1; k <= 40; k += 9) {
34995 GemmMicrokernelTester()
34996 .mr(1)
34997 .nr(4)
34998 .kr(8)
34999 .sr(1)
35000 .m(1)
35001 .n(4)
35002 .k(k)
35003 .ks(3)
35004 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
35005 }
35006 }
35007 }
35008
35009 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_LD64, strided_cm_subtile) {
35010 for (size_t k = 1; k <= 40; k += 9) {
35011 for (uint32_t m = 1; m <= 1; m++) {
35012 for (uint32_t n = 1; n <= 4; n++) {
35013 GemmMicrokernelTester()
35014 .mr(1)
35015 .nr(4)
35016 .kr(8)
35017 .sr(1)
35018 .m(m)
35019 .n(n)
35020 .k(k)
35021 .cm_stride(7)
35022 .iterations(1)
35023 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
35024 }
35025 }
35026 }
35027 }
35028
35029 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_LD64, a_offset) {
35030 for (size_t k = 1; k <= 40; k += 9) {
35031 GemmMicrokernelTester()
35032 .mr(1)
35033 .nr(4)
35034 .kr(8)
35035 .sr(1)
35036 .m(1)
35037 .n(4)
35038 .k(k)
35039 .ks(3)
35040 .a_offset(43)
35041 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
35042 }
35043 }
35044
35045 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_LD64, zero) {
35046 for (uint32_t mz = 0; mz < 1; mz++) {
35047 for (size_t k = 1; k <= 40; k += 9) {
35048 GemmMicrokernelTester()
35049 .mr(1)
35050 .nr(4)
35051 .kr(8)
35052 .sr(1)
35053 .m(1)
35054 .n(4)
35055 .k(k)
35056 .ks(3)
35057 .a_offset(43)
35058 .zero_index(mz)
35059 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
35060 }
35061 }
35062 }
35063
35064 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_LD64, qmin) {
35065 GemmMicrokernelTester()
35066 .mr(1)
35067 .nr(4)
35068 .kr(8)
35069 .sr(1)
35070 .m(1)
35071 .n(4)
35072 .k(8)
35073 .qmin(128)
35074 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
35075 }
35076
35077 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_LD64, qmax) {
35078 GemmMicrokernelTester()
35079 .mr(1)
35080 .nr(4)
35081 .kr(8)
35082 .sr(1)
35083 .m(1)
35084 .n(4)
35085 .k(8)
35086 .qmax(128)
35087 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
35088 }
35089
35090 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_LD64, strided_cm) {
35091 GemmMicrokernelTester()
35092 .mr(1)
35093 .nr(4)
35094 .kr(8)
35095 .sr(1)
35096 .m(1)
35097 .n(4)
35098 .k(8)
35099 .cm_stride(7)
35100 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
35101 }
35102
35103 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_LD64, no_a_zero_point) {
35104 for (size_t k = 1; k <= 40; k += 9) {
35105 GemmMicrokernelTester()
35106 .mr(1)
35107 .nr(4)
35108 .kr(8)
35109 .sr(1)
35110 .m(1)
35111 .n(4)
35112 .k(k)
35113 .a_zero_point(0)
35114 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
35115 }
35116 }
35117
35118 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_LD64, no_b_zero_point) {
35119 for (size_t k = 1; k <= 40; k += 9) {
35120 GemmMicrokernelTester()
35121 .mr(1)
35122 .nr(4)
35123 .kr(8)
35124 .sr(1)
35125 .m(1)
35126 .n(4)
35127 .k(k)
35128 .b_zero_point(0)
35129 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
35130 }
35131 }
35132
35133 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_LD64, no_zero_point) {
35134 for (size_t k = 1; k <= 40; k += 9) {
35135 GemmMicrokernelTester()
35136 .mr(1)
35137 .nr(4)
35138 .kr(8)
35139 .sr(1)
35140 .m(1)
35141 .n(4)
35142 .k(k)
35143 .a_zero_point(0)
35144 .b_zero_point(0)
35145 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
35146 }
35147 }
35148#endif // XNN_ARCH_WASMSIMD
35149
35150
35151#if XNN_ARCH_WASMSIMD
35152 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_LD64, k_eq_8) {
35153 GemmMicrokernelTester()
35154 .mr(2)
35155 .nr(4)
35156 .kr(8)
35157 .sr(1)
35158 .m(2)
35159 .n(4)
35160 .k(8)
35161 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
35162 }
35163
35164 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_LD64, strided_cn) {
35165 GemmMicrokernelTester()
35166 .mr(2)
35167 .nr(4)
35168 .kr(8)
35169 .sr(1)
35170 .m(2)
35171 .n(4)
35172 .k(8)
35173 .cn_stride(7)
35174 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
35175 }
35176
35177 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_LD64, k_eq_8_subtile) {
35178 for (uint32_t m = 1; m <= 2; m++) {
35179 for (uint32_t n = 1; n <= 4; n++) {
35180 GemmMicrokernelTester()
35181 .mr(2)
35182 .nr(4)
35183 .kr(8)
35184 .sr(1)
35185 .m(m)
35186 .n(n)
35187 .k(8)
35188 .iterations(1)
35189 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
35190 }
35191 }
35192 }
35193
35194 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_LD64, k_eq_8_subtile_m) {
35195 for (uint32_t m = 1; m <= 2; m++) {
35196 GemmMicrokernelTester()
35197 .mr(2)
35198 .nr(4)
35199 .kr(8)
35200 .sr(1)
35201 .m(m)
35202 .n(4)
35203 .k(8)
35204 .iterations(1)
35205 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
35206 }
35207 }
35208
35209 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_LD64, k_eq_8_subtile_n) {
35210 for (uint32_t n = 1; n <= 4; n++) {
35211 GemmMicrokernelTester()
35212 .mr(2)
35213 .nr(4)
35214 .kr(8)
35215 .sr(1)
35216 .m(2)
35217 .n(n)
35218 .k(8)
35219 .iterations(1)
35220 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
35221 }
35222 }
35223
35224 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_LD64, k_lt_8) {
35225 for (size_t k = 1; k < 8; k++) {
35226 GemmMicrokernelTester()
35227 .mr(2)
35228 .nr(4)
35229 .kr(8)
35230 .sr(1)
35231 .m(2)
35232 .n(4)
35233 .k(k)
35234 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
35235 }
35236 }
35237
35238 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_LD64, k_lt_8_subtile) {
35239 for (size_t k = 1; k < 8; k++) {
35240 for (uint32_t m = 1; m <= 2; m++) {
35241 for (uint32_t n = 1; n <= 4; n++) {
35242 GemmMicrokernelTester()
35243 .mr(2)
35244 .nr(4)
35245 .kr(8)
35246 .sr(1)
35247 .m(m)
35248 .n(n)
35249 .k(k)
35250 .iterations(1)
35251 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
35252 }
35253 }
35254 }
35255 }
35256
35257 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_LD64, k_gt_8) {
35258 for (size_t k = 9; k < 16; k++) {
35259 GemmMicrokernelTester()
35260 .mr(2)
35261 .nr(4)
35262 .kr(8)
35263 .sr(1)
35264 .m(2)
35265 .n(4)
35266 .k(k)
35267 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
35268 }
35269 }
35270
35271 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_LD64, k_gt_8_subtile) {
35272 for (size_t k = 9; k < 16; k++) {
35273 for (uint32_t m = 1; m <= 2; m++) {
35274 for (uint32_t n = 1; n <= 4; n++) {
35275 GemmMicrokernelTester()
35276 .mr(2)
35277 .nr(4)
35278 .kr(8)
35279 .sr(1)
35280 .m(m)
35281 .n(n)
35282 .k(k)
35283 .iterations(1)
35284 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
35285 }
35286 }
35287 }
35288 }
35289
35290 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_LD64, k_div_8) {
35291 for (size_t k = 16; k <= 80; k += 8) {
35292 GemmMicrokernelTester()
35293 .mr(2)
35294 .nr(4)
35295 .kr(8)
35296 .sr(1)
35297 .m(2)
35298 .n(4)
35299 .k(k)
35300 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
35301 }
35302 }
35303
35304 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_LD64, k_div_8_subtile) {
35305 for (size_t k = 16; k <= 80; k += 8) {
35306 for (uint32_t m = 1; m <= 2; m++) {
35307 for (uint32_t n = 1; n <= 4; n++) {
35308 GemmMicrokernelTester()
35309 .mr(2)
35310 .nr(4)
35311 .kr(8)
35312 .sr(1)
35313 .m(m)
35314 .n(n)
35315 .k(k)
35316 .iterations(1)
35317 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
35318 }
35319 }
35320 }
35321 }
35322
35323 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_LD64, n_gt_4) {
35324 for (uint32_t n = 5; n < 8; n++) {
35325 for (size_t k = 1; k <= 40; k += 9) {
35326 GemmMicrokernelTester()
35327 .mr(2)
35328 .nr(4)
35329 .kr(8)
35330 .sr(1)
35331 .m(2)
35332 .n(4)
35333 .k(k)
35334 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
35335 }
35336 }
35337 }
35338
35339 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_LD64, n_gt_4_strided_cn) {
35340 for (uint32_t n = 5; n < 8; n++) {
35341 for (size_t k = 1; k <= 40; k += 9) {
35342 GemmMicrokernelTester()
35343 .mr(2)
35344 .nr(4)
35345 .kr(8)
35346 .sr(1)
35347 .m(2)
35348 .n(4)
35349 .k(k)
35350 .cn_stride(7)
35351 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
35352 }
35353 }
35354 }
35355
35356 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_LD64, n_gt_4_subtile) {
35357 for (uint32_t n = 5; n < 8; n++) {
35358 for (size_t k = 1; k <= 40; k += 9) {
35359 for (uint32_t m = 1; m <= 2; m++) {
35360 GemmMicrokernelTester()
35361 .mr(2)
35362 .nr(4)
35363 .kr(8)
35364 .sr(1)
35365 .m(m)
35366 .n(n)
35367 .k(k)
35368 .iterations(1)
35369 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
35370 }
35371 }
35372 }
35373 }
35374
35375 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_LD64, n_div_4) {
35376 for (uint32_t n = 8; n <= 12; n += 4) {
35377 for (size_t k = 1; k <= 40; k += 9) {
35378 GemmMicrokernelTester()
35379 .mr(2)
35380 .nr(4)
35381 .kr(8)
35382 .sr(1)
35383 .m(2)
35384 .n(4)
35385 .k(k)
35386 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
35387 }
35388 }
35389 }
35390
35391 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_LD64, n_div_4_strided_cn) {
35392 for (uint32_t n = 8; n <= 12; n += 4) {
35393 for (size_t k = 1; k <= 40; k += 9) {
35394 GemmMicrokernelTester()
35395 .mr(2)
35396 .nr(4)
35397 .kr(8)
35398 .sr(1)
35399 .m(2)
35400 .n(n)
35401 .k(k)
35402 .cn_stride(7)
35403 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
35404 }
35405 }
35406 }
35407
35408 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_LD64, n_div_4_subtile) {
35409 for (uint32_t n = 8; n <= 12; n += 4) {
35410 for (size_t k = 1; k <= 40; k += 9) {
35411 for (uint32_t m = 1; m <= 2; m++) {
35412 GemmMicrokernelTester()
35413 .mr(2)
35414 .nr(4)
35415 .kr(8)
35416 .sr(1)
35417 .m(m)
35418 .n(n)
35419 .k(k)
35420 .iterations(1)
35421 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
35422 }
35423 }
35424 }
35425 }
35426
35427 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_LD64, small_kernel) {
35428 for (size_t k = 1; k <= 40; k += 9) {
35429 GemmMicrokernelTester()
35430 .mr(2)
35431 .nr(4)
35432 .kr(8)
35433 .sr(1)
35434 .m(2)
35435 .n(4)
35436 .k(k)
35437 .ks(3)
35438 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
35439 }
35440 }
35441
35442 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_LD64, small_kernel_subtile) {
35443 for (size_t k = 1; k <= 40; k += 9) {
35444 for (uint32_t m = 1; m <= 2; m++) {
35445 for (uint32_t n = 1; n <= 4; n++) {
35446 GemmMicrokernelTester()
35447 .mr(2)
35448 .nr(4)
35449 .kr(8)
35450 .sr(1)
35451 .m(m)
35452 .n(n)
35453 .k(k)
35454 .ks(3)
35455 .iterations(1)
35456 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
35457 }
35458 }
35459 }
35460 }
35461
35462 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_LD64, n_gt_4_small_kernel) {
35463 for (uint32_t n = 5; n < 8; n++) {
35464 for (size_t k = 1; k <= 40; k += 9) {
35465 GemmMicrokernelTester()
35466 .mr(2)
35467 .nr(4)
35468 .kr(8)
35469 .sr(1)
35470 .m(2)
35471 .n(4)
35472 .k(k)
35473 .ks(3)
35474 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
35475 }
35476 }
35477 }
35478
35479 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_LD64, n_div_4_small_kernel) {
35480 for (uint32_t n = 8; n <= 12; n += 4) {
35481 for (size_t k = 1; k <= 40; k += 9) {
35482 GemmMicrokernelTester()
35483 .mr(2)
35484 .nr(4)
35485 .kr(8)
35486 .sr(1)
35487 .m(2)
35488 .n(4)
35489 .k(k)
35490 .ks(3)
35491 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
35492 }
35493 }
35494 }
35495
35496 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_LD64, strided_cm_subtile) {
35497 for (size_t k = 1; k <= 40; k += 9) {
35498 for (uint32_t m = 1; m <= 2; m++) {
35499 for (uint32_t n = 1; n <= 4; n++) {
35500 GemmMicrokernelTester()
35501 .mr(2)
35502 .nr(4)
35503 .kr(8)
35504 .sr(1)
35505 .m(m)
35506 .n(n)
35507 .k(k)
35508 .cm_stride(7)
35509 .iterations(1)
35510 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
35511 }
35512 }
35513 }
35514 }
35515
35516 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_LD64, a_offset) {
35517 for (size_t k = 1; k <= 40; k += 9) {
35518 GemmMicrokernelTester()
35519 .mr(2)
35520 .nr(4)
35521 .kr(8)
35522 .sr(1)
35523 .m(2)
35524 .n(4)
35525 .k(k)
35526 .ks(3)
35527 .a_offset(83)
35528 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
35529 }
35530 }
35531
35532 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_LD64, zero) {
35533 for (uint32_t mz = 0; mz < 2; mz++) {
35534 for (size_t k = 1; k <= 40; k += 9) {
35535 GemmMicrokernelTester()
35536 .mr(2)
35537 .nr(4)
35538 .kr(8)
35539 .sr(1)
35540 .m(2)
35541 .n(4)
35542 .k(k)
35543 .ks(3)
35544 .a_offset(83)
35545 .zero_index(mz)
35546 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
35547 }
35548 }
35549 }
35550
35551 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_LD64, qmin) {
35552 GemmMicrokernelTester()
35553 .mr(2)
35554 .nr(4)
35555 .kr(8)
35556 .sr(1)
35557 .m(2)
35558 .n(4)
35559 .k(8)
35560 .qmin(128)
35561 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
35562 }
35563
35564 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_LD64, qmax) {
35565 GemmMicrokernelTester()
35566 .mr(2)
35567 .nr(4)
35568 .kr(8)
35569 .sr(1)
35570 .m(2)
35571 .n(4)
35572 .k(8)
35573 .qmax(128)
35574 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
35575 }
35576
35577 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_LD64, strided_cm) {
35578 GemmMicrokernelTester()
35579 .mr(2)
35580 .nr(4)
35581 .kr(8)
35582 .sr(1)
35583 .m(2)
35584 .n(4)
35585 .k(8)
35586 .cm_stride(7)
35587 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
35588 }
35589
35590 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_LD64, no_a_zero_point) {
35591 for (size_t k = 1; k <= 40; k += 9) {
35592 GemmMicrokernelTester()
35593 .mr(2)
35594 .nr(4)
35595 .kr(8)
35596 .sr(1)
35597 .m(2)
35598 .n(4)
35599 .k(k)
35600 .a_zero_point(0)
35601 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
35602 }
35603 }
35604
35605 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_LD64, no_b_zero_point) {
35606 for (size_t k = 1; k <= 40; k += 9) {
35607 GemmMicrokernelTester()
35608 .mr(2)
35609 .nr(4)
35610 .kr(8)
35611 .sr(1)
35612 .m(2)
35613 .n(4)
35614 .k(k)
35615 .b_zero_point(0)
35616 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
35617 }
35618 }
35619
35620 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_LD64, no_zero_point) {
35621 for (size_t k = 1; k <= 40; k += 9) {
35622 GemmMicrokernelTester()
35623 .mr(2)
35624 .nr(4)
35625 .kr(8)
35626 .sr(1)
35627 .m(2)
35628 .n(4)
35629 .k(k)
35630 .a_zero_point(0)
35631 .b_zero_point(0)
35632 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
35633 }
35634 }
35635#endif // XNN_ARCH_WASMSIMD
35636
35637
35638#if XNN_ARCH_WASMSIMD
35639 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_LD64, k_eq_8) {
35640 GemmMicrokernelTester()
35641 .mr(3)
35642 .nr(4)
35643 .kr(8)
35644 .sr(1)
35645 .m(3)
35646 .n(4)
35647 .k(8)
35648 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
35649 }
35650
35651 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_LD64, strided_cn) {
35652 GemmMicrokernelTester()
35653 .mr(3)
35654 .nr(4)
35655 .kr(8)
35656 .sr(1)
35657 .m(3)
35658 .n(4)
35659 .k(8)
35660 .cn_stride(7)
35661 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
35662 }
35663
35664 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_LD64, k_eq_8_subtile) {
35665 for (uint32_t m = 1; m <= 3; m++) {
35666 for (uint32_t n = 1; n <= 4; n++) {
35667 GemmMicrokernelTester()
35668 .mr(3)
35669 .nr(4)
35670 .kr(8)
35671 .sr(1)
35672 .m(m)
35673 .n(n)
35674 .k(8)
35675 .iterations(1)
35676 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
35677 }
35678 }
35679 }
35680
35681 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_LD64, k_eq_8_subtile_m) {
35682 for (uint32_t m = 1; m <= 3; m++) {
35683 GemmMicrokernelTester()
35684 .mr(3)
35685 .nr(4)
35686 .kr(8)
35687 .sr(1)
35688 .m(m)
35689 .n(4)
35690 .k(8)
35691 .iterations(1)
35692 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
35693 }
35694 }
35695
35696 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_LD64, k_eq_8_subtile_n) {
35697 for (uint32_t n = 1; n <= 4; n++) {
35698 GemmMicrokernelTester()
35699 .mr(3)
35700 .nr(4)
35701 .kr(8)
35702 .sr(1)
35703 .m(3)
35704 .n(n)
35705 .k(8)
35706 .iterations(1)
35707 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
35708 }
35709 }
35710
35711 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_LD64, k_lt_8) {
35712 for (size_t k = 1; k < 8; k++) {
35713 GemmMicrokernelTester()
35714 .mr(3)
35715 .nr(4)
35716 .kr(8)
35717 .sr(1)
35718 .m(3)
35719 .n(4)
35720 .k(k)
35721 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
35722 }
35723 }
35724
35725 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_LD64, k_lt_8_subtile) {
35726 for (size_t k = 1; k < 8; k++) {
35727 for (uint32_t m = 1; m <= 3; m++) {
35728 for (uint32_t n = 1; n <= 4; n++) {
35729 GemmMicrokernelTester()
35730 .mr(3)
35731 .nr(4)
35732 .kr(8)
35733 .sr(1)
35734 .m(m)
35735 .n(n)
35736 .k(k)
35737 .iterations(1)
35738 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
35739 }
35740 }
35741 }
35742 }
35743
35744 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_LD64, k_gt_8) {
35745 for (size_t k = 9; k < 16; k++) {
35746 GemmMicrokernelTester()
35747 .mr(3)
35748 .nr(4)
35749 .kr(8)
35750 .sr(1)
35751 .m(3)
35752 .n(4)
35753 .k(k)
35754 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
35755 }
35756 }
35757
35758 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_LD64, k_gt_8_subtile) {
35759 for (size_t k = 9; k < 16; k++) {
35760 for (uint32_t m = 1; m <= 3; m++) {
35761 for (uint32_t n = 1; n <= 4; n++) {
35762 GemmMicrokernelTester()
35763 .mr(3)
35764 .nr(4)
35765 .kr(8)
35766 .sr(1)
35767 .m(m)
35768 .n(n)
35769 .k(k)
35770 .iterations(1)
35771 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
35772 }
35773 }
35774 }
35775 }
35776
35777 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_LD64, k_div_8) {
35778 for (size_t k = 16; k <= 80; k += 8) {
35779 GemmMicrokernelTester()
35780 .mr(3)
35781 .nr(4)
35782 .kr(8)
35783 .sr(1)
35784 .m(3)
35785 .n(4)
35786 .k(k)
35787 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
35788 }
35789 }
35790
35791 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_LD64, k_div_8_subtile) {
35792 for (size_t k = 16; k <= 80; k += 8) {
35793 for (uint32_t m = 1; m <= 3; m++) {
35794 for (uint32_t n = 1; n <= 4; n++) {
35795 GemmMicrokernelTester()
35796 .mr(3)
35797 .nr(4)
35798 .kr(8)
35799 .sr(1)
35800 .m(m)
35801 .n(n)
35802 .k(k)
35803 .iterations(1)
35804 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
35805 }
35806 }
35807 }
35808 }
35809
35810 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_LD64, n_gt_4) {
35811 for (uint32_t n = 5; n < 8; n++) {
35812 for (size_t k = 1; k <= 40; k += 9) {
35813 GemmMicrokernelTester()
35814 .mr(3)
35815 .nr(4)
35816 .kr(8)
35817 .sr(1)
35818 .m(3)
35819 .n(4)
35820 .k(k)
35821 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
35822 }
35823 }
35824 }
35825
35826 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_LD64, n_gt_4_strided_cn) {
35827 for (uint32_t n = 5; n < 8; n++) {
35828 for (size_t k = 1; k <= 40; k += 9) {
35829 GemmMicrokernelTester()
35830 .mr(3)
35831 .nr(4)
35832 .kr(8)
35833 .sr(1)
35834 .m(3)
35835 .n(4)
35836 .k(k)
35837 .cn_stride(7)
35838 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
35839 }
35840 }
35841 }
35842
35843 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_LD64, n_gt_4_subtile) {
35844 for (uint32_t n = 5; n < 8; n++) {
35845 for (size_t k = 1; k <= 40; k += 9) {
35846 for (uint32_t m = 1; m <= 3; m++) {
35847 GemmMicrokernelTester()
35848 .mr(3)
35849 .nr(4)
35850 .kr(8)
35851 .sr(1)
35852 .m(m)
35853 .n(n)
35854 .k(k)
35855 .iterations(1)
35856 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
35857 }
35858 }
35859 }
35860 }
35861
35862 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_LD64, n_div_4) {
35863 for (uint32_t n = 8; n <= 12; n += 4) {
35864 for (size_t k = 1; k <= 40; k += 9) {
35865 GemmMicrokernelTester()
35866 .mr(3)
35867 .nr(4)
35868 .kr(8)
35869 .sr(1)
35870 .m(3)
35871 .n(4)
35872 .k(k)
35873 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
35874 }
35875 }
35876 }
35877
35878 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_LD64, n_div_4_strided_cn) {
35879 for (uint32_t n = 8; n <= 12; n += 4) {
35880 for (size_t k = 1; k <= 40; k += 9) {
35881 GemmMicrokernelTester()
35882 .mr(3)
35883 .nr(4)
35884 .kr(8)
35885 .sr(1)
35886 .m(3)
35887 .n(n)
35888 .k(k)
35889 .cn_stride(7)
35890 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
35891 }
35892 }
35893 }
35894
35895 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_LD64, n_div_4_subtile) {
35896 for (uint32_t n = 8; n <= 12; n += 4) {
35897 for (size_t k = 1; k <= 40; k += 9) {
35898 for (uint32_t m = 1; m <= 3; m++) {
35899 GemmMicrokernelTester()
35900 .mr(3)
35901 .nr(4)
35902 .kr(8)
35903 .sr(1)
35904 .m(m)
35905 .n(n)
35906 .k(k)
35907 .iterations(1)
35908 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
35909 }
35910 }
35911 }
35912 }
35913
35914 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_LD64, small_kernel) {
35915 for (size_t k = 1; k <= 40; k += 9) {
35916 GemmMicrokernelTester()
35917 .mr(3)
35918 .nr(4)
35919 .kr(8)
35920 .sr(1)
35921 .m(3)
35922 .n(4)
35923 .k(k)
35924 .ks(3)
35925 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
35926 }
35927 }
35928
35929 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_LD64, small_kernel_subtile) {
35930 for (size_t k = 1; k <= 40; k += 9) {
35931 for (uint32_t m = 1; m <= 3; m++) {
35932 for (uint32_t n = 1; n <= 4; n++) {
35933 GemmMicrokernelTester()
35934 .mr(3)
35935 .nr(4)
35936 .kr(8)
35937 .sr(1)
35938 .m(m)
35939 .n(n)
35940 .k(k)
35941 .ks(3)
35942 .iterations(1)
35943 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
35944 }
35945 }
35946 }
35947 }
35948
35949 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_LD64, n_gt_4_small_kernel) {
35950 for (uint32_t n = 5; n < 8; n++) {
35951 for (size_t k = 1; k <= 40; k += 9) {
35952 GemmMicrokernelTester()
35953 .mr(3)
35954 .nr(4)
35955 .kr(8)
35956 .sr(1)
35957 .m(3)
35958 .n(4)
35959 .k(k)
35960 .ks(3)
35961 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
35962 }
35963 }
35964 }
35965
35966 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_LD64, n_div_4_small_kernel) {
35967 for (uint32_t n = 8; n <= 12; n += 4) {
35968 for (size_t k = 1; k <= 40; k += 9) {
35969 GemmMicrokernelTester()
35970 .mr(3)
35971 .nr(4)
35972 .kr(8)
35973 .sr(1)
35974 .m(3)
35975 .n(4)
35976 .k(k)
35977 .ks(3)
35978 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
35979 }
35980 }
35981 }
35982
35983 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_LD64, strided_cm_subtile) {
35984 for (size_t k = 1; k <= 40; k += 9) {
35985 for (uint32_t m = 1; m <= 3; m++) {
35986 for (uint32_t n = 1; n <= 4; n++) {
35987 GemmMicrokernelTester()
35988 .mr(3)
35989 .nr(4)
35990 .kr(8)
35991 .sr(1)
35992 .m(m)
35993 .n(n)
35994 .k(k)
35995 .cm_stride(7)
35996 .iterations(1)
35997 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
35998 }
35999 }
36000 }
36001 }
36002
36003 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_LD64, a_offset) {
36004 for (size_t k = 1; k <= 40; k += 9) {
36005 GemmMicrokernelTester()
36006 .mr(3)
36007 .nr(4)
36008 .kr(8)
36009 .sr(1)
36010 .m(3)
36011 .n(4)
36012 .k(k)
36013 .ks(3)
36014 .a_offset(127)
36015 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
36016 }
36017 }
36018
36019 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_LD64, zero) {
36020 for (uint32_t mz = 0; mz < 3; mz++) {
36021 for (size_t k = 1; k <= 40; k += 9) {
36022 GemmMicrokernelTester()
36023 .mr(3)
36024 .nr(4)
36025 .kr(8)
36026 .sr(1)
36027 .m(3)
36028 .n(4)
36029 .k(k)
36030 .ks(3)
36031 .a_offset(127)
36032 .zero_index(mz)
36033 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
36034 }
36035 }
36036 }
36037
36038 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_LD64, qmin) {
36039 GemmMicrokernelTester()
36040 .mr(3)
36041 .nr(4)
36042 .kr(8)
36043 .sr(1)
36044 .m(3)
36045 .n(4)
36046 .k(8)
36047 .qmin(128)
36048 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
36049 }
36050
36051 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_LD64, qmax) {
36052 GemmMicrokernelTester()
36053 .mr(3)
36054 .nr(4)
36055 .kr(8)
36056 .sr(1)
36057 .m(3)
36058 .n(4)
36059 .k(8)
36060 .qmax(128)
36061 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
36062 }
36063
36064 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_LD64, strided_cm) {
36065 GemmMicrokernelTester()
36066 .mr(3)
36067 .nr(4)
36068 .kr(8)
36069 .sr(1)
36070 .m(3)
36071 .n(4)
36072 .k(8)
36073 .cm_stride(7)
36074 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
36075 }
36076
36077 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_LD64, no_a_zero_point) {
36078 for (size_t k = 1; k <= 40; k += 9) {
36079 GemmMicrokernelTester()
36080 .mr(3)
36081 .nr(4)
36082 .kr(8)
36083 .sr(1)
36084 .m(3)
36085 .n(4)
36086 .k(k)
36087 .a_zero_point(0)
36088 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
36089 }
36090 }
36091
36092 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_LD64, no_b_zero_point) {
36093 for (size_t k = 1; k <= 40; k += 9) {
36094 GemmMicrokernelTester()
36095 .mr(3)
36096 .nr(4)
36097 .kr(8)
36098 .sr(1)
36099 .m(3)
36100 .n(4)
36101 .k(k)
36102 .b_zero_point(0)
36103 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
36104 }
36105 }
36106
36107 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_LD64, no_zero_point) {
36108 for (size_t k = 1; k <= 40; k += 9) {
36109 GemmMicrokernelTester()
36110 .mr(3)
36111 .nr(4)
36112 .kr(8)
36113 .sr(1)
36114 .m(3)
36115 .n(4)
36116 .k(k)
36117 .a_zero_point(0)
36118 .b_zero_point(0)
36119 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
36120 }
36121 }
36122#endif // XNN_ARCH_WASMSIMD
36123
36124
36125#if XNN_ARCH_WASMSIMD
36126 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_LD128, k_eq_8) {
36127 GemmMicrokernelTester()
36128 .mr(1)
36129 .nr(4)
36130 .kr(8)
36131 .sr(1)
36132 .m(1)
36133 .n(4)
36134 .k(8)
36135 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
36136 }
36137
36138 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_LD128, strided_cn) {
36139 GemmMicrokernelTester()
36140 .mr(1)
36141 .nr(4)
36142 .kr(8)
36143 .sr(1)
36144 .m(1)
36145 .n(4)
36146 .k(8)
36147 .cn_stride(7)
36148 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
36149 }
36150
36151 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_LD128, k_eq_8_subtile) {
36152 for (uint32_t m = 1; m <= 1; m++) {
36153 for (uint32_t n = 1; n <= 4; n++) {
36154 GemmMicrokernelTester()
36155 .mr(1)
36156 .nr(4)
36157 .kr(8)
36158 .sr(1)
36159 .m(m)
36160 .n(n)
36161 .k(8)
36162 .iterations(1)
36163 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
36164 }
36165 }
36166 }
36167
36168 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_LD128, k_eq_8_subtile_m) {
36169 for (uint32_t m = 1; m <= 1; m++) {
36170 GemmMicrokernelTester()
36171 .mr(1)
36172 .nr(4)
36173 .kr(8)
36174 .sr(1)
36175 .m(m)
36176 .n(4)
36177 .k(8)
36178 .iterations(1)
36179 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
36180 }
36181 }
36182
36183 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_LD128, k_eq_8_subtile_n) {
36184 for (uint32_t n = 1; n <= 4; n++) {
36185 GemmMicrokernelTester()
36186 .mr(1)
36187 .nr(4)
36188 .kr(8)
36189 .sr(1)
36190 .m(1)
36191 .n(n)
36192 .k(8)
36193 .iterations(1)
36194 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
36195 }
36196 }
36197
36198 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_LD128, k_lt_8) {
36199 for (size_t k = 1; k < 8; k++) {
36200 GemmMicrokernelTester()
36201 .mr(1)
36202 .nr(4)
36203 .kr(8)
36204 .sr(1)
36205 .m(1)
36206 .n(4)
36207 .k(k)
36208 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
36209 }
36210 }
36211
36212 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_LD128, k_lt_8_subtile) {
36213 for (size_t k = 1; k < 8; k++) {
36214 for (uint32_t m = 1; m <= 1; m++) {
36215 for (uint32_t n = 1; n <= 4; n++) {
36216 GemmMicrokernelTester()
36217 .mr(1)
36218 .nr(4)
36219 .kr(8)
36220 .sr(1)
36221 .m(m)
36222 .n(n)
36223 .k(k)
36224 .iterations(1)
36225 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
36226 }
36227 }
36228 }
36229 }
36230
36231 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_LD128, k_gt_8) {
36232 for (size_t k = 9; k < 16; k++) {
36233 GemmMicrokernelTester()
36234 .mr(1)
36235 .nr(4)
36236 .kr(8)
36237 .sr(1)
36238 .m(1)
36239 .n(4)
36240 .k(k)
36241 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
36242 }
36243 }
36244
36245 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_LD128, k_gt_8_subtile) {
36246 for (size_t k = 9; k < 16; k++) {
36247 for (uint32_t m = 1; m <= 1; m++) {
36248 for (uint32_t n = 1; n <= 4; n++) {
36249 GemmMicrokernelTester()
36250 .mr(1)
36251 .nr(4)
36252 .kr(8)
36253 .sr(1)
36254 .m(m)
36255 .n(n)
36256 .k(k)
36257 .iterations(1)
36258 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
36259 }
36260 }
36261 }
36262 }
36263
36264 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_LD128, k_div_8) {
36265 for (size_t k = 16; k <= 80; k += 8) {
36266 GemmMicrokernelTester()
36267 .mr(1)
36268 .nr(4)
36269 .kr(8)
36270 .sr(1)
36271 .m(1)
36272 .n(4)
36273 .k(k)
36274 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
36275 }
36276 }
36277
36278 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_LD128, k_div_8_subtile) {
36279 for (size_t k = 16; k <= 80; k += 8) {
36280 for (uint32_t m = 1; m <= 1; m++) {
36281 for (uint32_t n = 1; n <= 4; n++) {
36282 GemmMicrokernelTester()
36283 .mr(1)
36284 .nr(4)
36285 .kr(8)
36286 .sr(1)
36287 .m(m)
36288 .n(n)
36289 .k(k)
36290 .iterations(1)
36291 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
36292 }
36293 }
36294 }
36295 }
36296
36297 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_LD128, n_gt_4) {
36298 for (uint32_t n = 5; n < 8; n++) {
36299 for (size_t k = 1; k <= 40; k += 9) {
36300 GemmMicrokernelTester()
36301 .mr(1)
36302 .nr(4)
36303 .kr(8)
36304 .sr(1)
36305 .m(1)
36306 .n(4)
36307 .k(k)
36308 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
36309 }
36310 }
36311 }
36312
36313 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_LD128, n_gt_4_strided_cn) {
36314 for (uint32_t n = 5; n < 8; n++) {
36315 for (size_t k = 1; k <= 40; k += 9) {
36316 GemmMicrokernelTester()
36317 .mr(1)
36318 .nr(4)
36319 .kr(8)
36320 .sr(1)
36321 .m(1)
36322 .n(4)
36323 .k(k)
36324 .cn_stride(7)
36325 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
36326 }
36327 }
36328 }
36329
36330 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_LD128, n_gt_4_subtile) {
36331 for (uint32_t n = 5; n < 8; n++) {
36332 for (size_t k = 1; k <= 40; k += 9) {
36333 for (uint32_t m = 1; m <= 1; m++) {
36334 GemmMicrokernelTester()
36335 .mr(1)
36336 .nr(4)
36337 .kr(8)
36338 .sr(1)
36339 .m(m)
36340 .n(n)
36341 .k(k)
36342 .iterations(1)
36343 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
36344 }
36345 }
36346 }
36347 }
36348
36349 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_LD128, n_div_4) {
36350 for (uint32_t n = 8; n <= 12; n += 4) {
36351 for (size_t k = 1; k <= 40; k += 9) {
36352 GemmMicrokernelTester()
36353 .mr(1)
36354 .nr(4)
36355 .kr(8)
36356 .sr(1)
36357 .m(1)
36358 .n(4)
36359 .k(k)
36360 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
36361 }
36362 }
36363 }
36364
36365 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_LD128, n_div_4_strided_cn) {
36366 for (uint32_t n = 8; n <= 12; n += 4) {
36367 for (size_t k = 1; k <= 40; k += 9) {
36368 GemmMicrokernelTester()
36369 .mr(1)
36370 .nr(4)
36371 .kr(8)
36372 .sr(1)
36373 .m(1)
36374 .n(n)
36375 .k(k)
36376 .cn_stride(7)
36377 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
36378 }
36379 }
36380 }
36381
36382 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_LD128, n_div_4_subtile) {
36383 for (uint32_t n = 8; n <= 12; n += 4) {
36384 for (size_t k = 1; k <= 40; k += 9) {
36385 for (uint32_t m = 1; m <= 1; m++) {
36386 GemmMicrokernelTester()
36387 .mr(1)
36388 .nr(4)
36389 .kr(8)
36390 .sr(1)
36391 .m(m)
36392 .n(n)
36393 .k(k)
36394 .iterations(1)
36395 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
36396 }
36397 }
36398 }
36399 }
36400
36401 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_LD128, small_kernel) {
36402 for (size_t k = 1; k <= 40; k += 9) {
36403 GemmMicrokernelTester()
36404 .mr(1)
36405 .nr(4)
36406 .kr(8)
36407 .sr(1)
36408 .m(1)
36409 .n(4)
36410 .k(k)
36411 .ks(3)
36412 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
36413 }
36414 }
36415
36416 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_LD128, small_kernel_subtile) {
36417 for (size_t k = 1; k <= 40; k += 9) {
36418 for (uint32_t m = 1; m <= 1; m++) {
36419 for (uint32_t n = 1; n <= 4; n++) {
36420 GemmMicrokernelTester()
36421 .mr(1)
36422 .nr(4)
36423 .kr(8)
36424 .sr(1)
36425 .m(m)
36426 .n(n)
36427 .k(k)
36428 .ks(3)
36429 .iterations(1)
36430 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
36431 }
36432 }
36433 }
36434 }
36435
36436 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_LD128, n_gt_4_small_kernel) {
36437 for (uint32_t n = 5; n < 8; n++) {
36438 for (size_t k = 1; k <= 40; k += 9) {
36439 GemmMicrokernelTester()
36440 .mr(1)
36441 .nr(4)
36442 .kr(8)
36443 .sr(1)
36444 .m(1)
36445 .n(4)
36446 .k(k)
36447 .ks(3)
36448 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
36449 }
36450 }
36451 }
36452
36453 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_LD128, n_div_4_small_kernel) {
36454 for (uint32_t n = 8; n <= 12; n += 4) {
36455 for (size_t k = 1; k <= 40; k += 9) {
36456 GemmMicrokernelTester()
36457 .mr(1)
36458 .nr(4)
36459 .kr(8)
36460 .sr(1)
36461 .m(1)
36462 .n(4)
36463 .k(k)
36464 .ks(3)
36465 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
36466 }
36467 }
36468 }
36469
36470 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_LD128, strided_cm_subtile) {
36471 for (size_t k = 1; k <= 40; k += 9) {
36472 for (uint32_t m = 1; m <= 1; m++) {
36473 for (uint32_t n = 1; n <= 4; n++) {
36474 GemmMicrokernelTester()
36475 .mr(1)
36476 .nr(4)
36477 .kr(8)
36478 .sr(1)
36479 .m(m)
36480 .n(n)
36481 .k(k)
36482 .cm_stride(7)
36483 .iterations(1)
36484 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
36485 }
36486 }
36487 }
36488 }
36489
36490 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_LD128, a_offset) {
36491 for (size_t k = 1; k <= 40; k += 9) {
36492 GemmMicrokernelTester()
36493 .mr(1)
36494 .nr(4)
36495 .kr(8)
36496 .sr(1)
36497 .m(1)
36498 .n(4)
36499 .k(k)
36500 .ks(3)
36501 .a_offset(43)
36502 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
36503 }
36504 }
36505
36506 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_LD128, zero) {
36507 for (uint32_t mz = 0; mz < 1; mz++) {
36508 for (size_t k = 1; k <= 40; k += 9) {
36509 GemmMicrokernelTester()
36510 .mr(1)
36511 .nr(4)
36512 .kr(8)
36513 .sr(1)
36514 .m(1)
36515 .n(4)
36516 .k(k)
36517 .ks(3)
36518 .a_offset(43)
36519 .zero_index(mz)
36520 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
36521 }
36522 }
36523 }
36524
36525 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_LD128, qmin) {
36526 GemmMicrokernelTester()
36527 .mr(1)
36528 .nr(4)
36529 .kr(8)
36530 .sr(1)
36531 .m(1)
36532 .n(4)
36533 .k(8)
36534 .qmin(128)
36535 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
36536 }
36537
36538 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_LD128, qmax) {
36539 GemmMicrokernelTester()
36540 .mr(1)
36541 .nr(4)
36542 .kr(8)
36543 .sr(1)
36544 .m(1)
36545 .n(4)
36546 .k(8)
36547 .qmax(128)
36548 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
36549 }
36550
36551 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_LD128, strided_cm) {
36552 GemmMicrokernelTester()
36553 .mr(1)
36554 .nr(4)
36555 .kr(8)
36556 .sr(1)
36557 .m(1)
36558 .n(4)
36559 .k(8)
36560 .cm_stride(7)
36561 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
36562 }
36563
36564 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_LD128, no_a_zero_point) {
36565 for (size_t k = 1; k <= 40; k += 9) {
36566 GemmMicrokernelTester()
36567 .mr(1)
36568 .nr(4)
36569 .kr(8)
36570 .sr(1)
36571 .m(1)
36572 .n(4)
36573 .k(k)
36574 .a_zero_point(0)
36575 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
36576 }
36577 }
36578
36579 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_LD128, no_b_zero_point) {
36580 for (size_t k = 1; k <= 40; k += 9) {
36581 GemmMicrokernelTester()
36582 .mr(1)
36583 .nr(4)
36584 .kr(8)
36585 .sr(1)
36586 .m(1)
36587 .n(4)
36588 .k(k)
36589 .b_zero_point(0)
36590 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
36591 }
36592 }
36593
36594 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_LD128, no_zero_point) {
36595 for (size_t k = 1; k <= 40; k += 9) {
36596 GemmMicrokernelTester()
36597 .mr(1)
36598 .nr(4)
36599 .kr(8)
36600 .sr(1)
36601 .m(1)
36602 .n(4)
36603 .k(k)
36604 .a_zero_point(0)
36605 .b_zero_point(0)
36606 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
36607 }
36608 }
36609#endif // XNN_ARCH_WASMSIMD
36610
36611
36612#if XNN_ARCH_WASMSIMD
36613 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_LD128, k_eq_8) {
36614 GemmMicrokernelTester()
36615 .mr(2)
36616 .nr(4)
36617 .kr(8)
36618 .sr(1)
36619 .m(2)
36620 .n(4)
36621 .k(8)
36622 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
36623 }
36624
36625 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_LD128, strided_cn) {
36626 GemmMicrokernelTester()
36627 .mr(2)
36628 .nr(4)
36629 .kr(8)
36630 .sr(1)
36631 .m(2)
36632 .n(4)
36633 .k(8)
36634 .cn_stride(7)
36635 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
36636 }
36637
36638 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_LD128, k_eq_8_subtile) {
36639 for (uint32_t m = 1; m <= 2; m++) {
36640 for (uint32_t n = 1; n <= 4; n++) {
36641 GemmMicrokernelTester()
36642 .mr(2)
36643 .nr(4)
36644 .kr(8)
36645 .sr(1)
36646 .m(m)
36647 .n(n)
36648 .k(8)
36649 .iterations(1)
36650 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
36651 }
36652 }
36653 }
36654
36655 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_LD128, k_eq_8_subtile_m) {
36656 for (uint32_t m = 1; m <= 2; m++) {
36657 GemmMicrokernelTester()
36658 .mr(2)
36659 .nr(4)
36660 .kr(8)
36661 .sr(1)
36662 .m(m)
36663 .n(4)
36664 .k(8)
36665 .iterations(1)
36666 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
36667 }
36668 }
36669
36670 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_LD128, k_eq_8_subtile_n) {
36671 for (uint32_t n = 1; n <= 4; n++) {
36672 GemmMicrokernelTester()
36673 .mr(2)
36674 .nr(4)
36675 .kr(8)
36676 .sr(1)
36677 .m(2)
36678 .n(n)
36679 .k(8)
36680 .iterations(1)
36681 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
36682 }
36683 }
36684
36685 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_LD128, k_lt_8) {
36686 for (size_t k = 1; k < 8; k++) {
36687 GemmMicrokernelTester()
36688 .mr(2)
36689 .nr(4)
36690 .kr(8)
36691 .sr(1)
36692 .m(2)
36693 .n(4)
36694 .k(k)
36695 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
36696 }
36697 }
36698
36699 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_LD128, k_lt_8_subtile) {
36700 for (size_t k = 1; k < 8; k++) {
36701 for (uint32_t m = 1; m <= 2; m++) {
36702 for (uint32_t n = 1; n <= 4; n++) {
36703 GemmMicrokernelTester()
36704 .mr(2)
36705 .nr(4)
36706 .kr(8)
36707 .sr(1)
36708 .m(m)
36709 .n(n)
36710 .k(k)
36711 .iterations(1)
36712 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
36713 }
36714 }
36715 }
36716 }
36717
36718 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_LD128, k_gt_8) {
36719 for (size_t k = 9; k < 16; k++) {
36720 GemmMicrokernelTester()
36721 .mr(2)
36722 .nr(4)
36723 .kr(8)
36724 .sr(1)
36725 .m(2)
36726 .n(4)
36727 .k(k)
36728 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
36729 }
36730 }
36731
36732 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_LD128, k_gt_8_subtile) {
36733 for (size_t k = 9; k < 16; k++) {
36734 for (uint32_t m = 1; m <= 2; m++) {
36735 for (uint32_t n = 1; n <= 4; n++) {
36736 GemmMicrokernelTester()
36737 .mr(2)
36738 .nr(4)
36739 .kr(8)
36740 .sr(1)
36741 .m(m)
36742 .n(n)
36743 .k(k)
36744 .iterations(1)
36745 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
36746 }
36747 }
36748 }
36749 }
36750
36751 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_LD128, k_div_8) {
36752 for (size_t k = 16; k <= 80; k += 8) {
36753 GemmMicrokernelTester()
36754 .mr(2)
36755 .nr(4)
36756 .kr(8)
36757 .sr(1)
36758 .m(2)
36759 .n(4)
36760 .k(k)
36761 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
36762 }
36763 }
36764
36765 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_LD128, k_div_8_subtile) {
36766 for (size_t k = 16; k <= 80; k += 8) {
36767 for (uint32_t m = 1; m <= 2; m++) {
36768 for (uint32_t n = 1; n <= 4; n++) {
36769 GemmMicrokernelTester()
36770 .mr(2)
36771 .nr(4)
36772 .kr(8)
36773 .sr(1)
36774 .m(m)
36775 .n(n)
36776 .k(k)
36777 .iterations(1)
36778 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
36779 }
36780 }
36781 }
36782 }
36783
36784 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_LD128, n_gt_4) {
36785 for (uint32_t n = 5; n < 8; n++) {
36786 for (size_t k = 1; k <= 40; k += 9) {
36787 GemmMicrokernelTester()
36788 .mr(2)
36789 .nr(4)
36790 .kr(8)
36791 .sr(1)
36792 .m(2)
36793 .n(4)
36794 .k(k)
36795 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
36796 }
36797 }
36798 }
36799
36800 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_LD128, n_gt_4_strided_cn) {
36801 for (uint32_t n = 5; n < 8; n++) {
36802 for (size_t k = 1; k <= 40; k += 9) {
36803 GemmMicrokernelTester()
36804 .mr(2)
36805 .nr(4)
36806 .kr(8)
36807 .sr(1)
36808 .m(2)
36809 .n(4)
36810 .k(k)
36811 .cn_stride(7)
36812 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
36813 }
36814 }
36815 }
36816
36817 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_LD128, n_gt_4_subtile) {
36818 for (uint32_t n = 5; n < 8; n++) {
36819 for (size_t k = 1; k <= 40; k += 9) {
36820 for (uint32_t m = 1; m <= 2; m++) {
36821 GemmMicrokernelTester()
36822 .mr(2)
36823 .nr(4)
36824 .kr(8)
36825 .sr(1)
36826 .m(m)
36827 .n(n)
36828 .k(k)
36829 .iterations(1)
36830 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
36831 }
36832 }
36833 }
36834 }
36835
36836 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_LD128, n_div_4) {
36837 for (uint32_t n = 8; n <= 12; n += 4) {
36838 for (size_t k = 1; k <= 40; k += 9) {
36839 GemmMicrokernelTester()
36840 .mr(2)
36841 .nr(4)
36842 .kr(8)
36843 .sr(1)
36844 .m(2)
36845 .n(4)
36846 .k(k)
36847 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
36848 }
36849 }
36850 }
36851
36852 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_LD128, n_div_4_strided_cn) {
36853 for (uint32_t n = 8; n <= 12; n += 4) {
36854 for (size_t k = 1; k <= 40; k += 9) {
36855 GemmMicrokernelTester()
36856 .mr(2)
36857 .nr(4)
36858 .kr(8)
36859 .sr(1)
36860 .m(2)
36861 .n(n)
36862 .k(k)
36863 .cn_stride(7)
36864 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
36865 }
36866 }
36867 }
36868
36869 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_LD128, n_div_4_subtile) {
36870 for (uint32_t n = 8; n <= 12; n += 4) {
36871 for (size_t k = 1; k <= 40; k += 9) {
36872 for (uint32_t m = 1; m <= 2; m++) {
36873 GemmMicrokernelTester()
36874 .mr(2)
36875 .nr(4)
36876 .kr(8)
36877 .sr(1)
36878 .m(m)
36879 .n(n)
36880 .k(k)
36881 .iterations(1)
36882 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
36883 }
36884 }
36885 }
36886 }
36887
36888 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_LD128, small_kernel) {
36889 for (size_t k = 1; k <= 40; k += 9) {
36890 GemmMicrokernelTester()
36891 .mr(2)
36892 .nr(4)
36893 .kr(8)
36894 .sr(1)
36895 .m(2)
36896 .n(4)
36897 .k(k)
36898 .ks(3)
36899 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
36900 }
36901 }
36902
36903 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_LD128, small_kernel_subtile) {
36904 for (size_t k = 1; k <= 40; k += 9) {
36905 for (uint32_t m = 1; m <= 2; m++) {
36906 for (uint32_t n = 1; n <= 4; n++) {
36907 GemmMicrokernelTester()
36908 .mr(2)
36909 .nr(4)
36910 .kr(8)
36911 .sr(1)
36912 .m(m)
36913 .n(n)
36914 .k(k)
36915 .ks(3)
36916 .iterations(1)
36917 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
36918 }
36919 }
36920 }
36921 }
36922
36923 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_LD128, n_gt_4_small_kernel) {
36924 for (uint32_t n = 5; n < 8; n++) {
36925 for (size_t k = 1; k <= 40; k += 9) {
36926 GemmMicrokernelTester()
36927 .mr(2)
36928 .nr(4)
36929 .kr(8)
36930 .sr(1)
36931 .m(2)
36932 .n(4)
36933 .k(k)
36934 .ks(3)
36935 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
36936 }
36937 }
36938 }
36939
36940 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_LD128, n_div_4_small_kernel) {
36941 for (uint32_t n = 8; n <= 12; n += 4) {
36942 for (size_t k = 1; k <= 40; k += 9) {
36943 GemmMicrokernelTester()
36944 .mr(2)
36945 .nr(4)
36946 .kr(8)
36947 .sr(1)
36948 .m(2)
36949 .n(4)
36950 .k(k)
36951 .ks(3)
36952 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
36953 }
36954 }
36955 }
36956
36957 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_LD128, strided_cm_subtile) {
36958 for (size_t k = 1; k <= 40; k += 9) {
36959 for (uint32_t m = 1; m <= 2; m++) {
36960 for (uint32_t n = 1; n <= 4; n++) {
36961 GemmMicrokernelTester()
36962 .mr(2)
36963 .nr(4)
36964 .kr(8)
36965 .sr(1)
36966 .m(m)
36967 .n(n)
36968 .k(k)
36969 .cm_stride(7)
36970 .iterations(1)
36971 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
36972 }
36973 }
36974 }
36975 }
36976
36977 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_LD128, a_offset) {
36978 for (size_t k = 1; k <= 40; k += 9) {
36979 GemmMicrokernelTester()
36980 .mr(2)
36981 .nr(4)
36982 .kr(8)
36983 .sr(1)
36984 .m(2)
36985 .n(4)
36986 .k(k)
36987 .ks(3)
36988 .a_offset(83)
36989 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
36990 }
36991 }
36992
36993 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_LD128, zero) {
36994 for (uint32_t mz = 0; mz < 2; mz++) {
36995 for (size_t k = 1; k <= 40; k += 9) {
36996 GemmMicrokernelTester()
36997 .mr(2)
36998 .nr(4)
36999 .kr(8)
37000 .sr(1)
37001 .m(2)
37002 .n(4)
37003 .k(k)
37004 .ks(3)
37005 .a_offset(83)
37006 .zero_index(mz)
37007 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
37008 }
37009 }
37010 }
37011
37012 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_LD128, qmin) {
37013 GemmMicrokernelTester()
37014 .mr(2)
37015 .nr(4)
37016 .kr(8)
37017 .sr(1)
37018 .m(2)
37019 .n(4)
37020 .k(8)
37021 .qmin(128)
37022 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
37023 }
37024
37025 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_LD128, qmax) {
37026 GemmMicrokernelTester()
37027 .mr(2)
37028 .nr(4)
37029 .kr(8)
37030 .sr(1)
37031 .m(2)
37032 .n(4)
37033 .k(8)
37034 .qmax(128)
37035 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
37036 }
37037
37038 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_LD128, strided_cm) {
37039 GemmMicrokernelTester()
37040 .mr(2)
37041 .nr(4)
37042 .kr(8)
37043 .sr(1)
37044 .m(2)
37045 .n(4)
37046 .k(8)
37047 .cm_stride(7)
37048 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
37049 }
37050
37051 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_LD128, no_a_zero_point) {
37052 for (size_t k = 1; k <= 40; k += 9) {
37053 GemmMicrokernelTester()
37054 .mr(2)
37055 .nr(4)
37056 .kr(8)
37057 .sr(1)
37058 .m(2)
37059 .n(4)
37060 .k(k)
37061 .a_zero_point(0)
37062 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
37063 }
37064 }
37065
37066 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_LD128, no_b_zero_point) {
37067 for (size_t k = 1; k <= 40; k += 9) {
37068 GemmMicrokernelTester()
37069 .mr(2)
37070 .nr(4)
37071 .kr(8)
37072 .sr(1)
37073 .m(2)
37074 .n(4)
37075 .k(k)
37076 .b_zero_point(0)
37077 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
37078 }
37079 }
37080
37081 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_LD128, no_zero_point) {
37082 for (size_t k = 1; k <= 40; k += 9) {
37083 GemmMicrokernelTester()
37084 .mr(2)
37085 .nr(4)
37086 .kr(8)
37087 .sr(1)
37088 .m(2)
37089 .n(4)
37090 .k(k)
37091 .a_zero_point(0)
37092 .b_zero_point(0)
37093 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
37094 }
37095 }
37096#endif // XNN_ARCH_WASMSIMD
37097
37098
37099#if XNN_ARCH_WASMSIMD
37100 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_LD128, k_eq_8) {
37101 GemmMicrokernelTester()
37102 .mr(3)
37103 .nr(4)
37104 .kr(8)
37105 .sr(1)
37106 .m(3)
37107 .n(4)
37108 .k(8)
37109 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
37110 }
37111
37112 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_LD128, strided_cn) {
37113 GemmMicrokernelTester()
37114 .mr(3)
37115 .nr(4)
37116 .kr(8)
37117 .sr(1)
37118 .m(3)
37119 .n(4)
37120 .k(8)
37121 .cn_stride(7)
37122 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
37123 }
37124
37125 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_LD128, k_eq_8_subtile) {
37126 for (uint32_t m = 1; m <= 3; m++) {
37127 for (uint32_t n = 1; n <= 4; n++) {
37128 GemmMicrokernelTester()
37129 .mr(3)
37130 .nr(4)
37131 .kr(8)
37132 .sr(1)
37133 .m(m)
37134 .n(n)
37135 .k(8)
37136 .iterations(1)
37137 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
37138 }
37139 }
37140 }
37141
37142 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_LD128, k_eq_8_subtile_m) {
37143 for (uint32_t m = 1; m <= 3; m++) {
37144 GemmMicrokernelTester()
37145 .mr(3)
37146 .nr(4)
37147 .kr(8)
37148 .sr(1)
37149 .m(m)
37150 .n(4)
37151 .k(8)
37152 .iterations(1)
37153 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
37154 }
37155 }
37156
37157 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_LD128, k_eq_8_subtile_n) {
37158 for (uint32_t n = 1; n <= 4; n++) {
37159 GemmMicrokernelTester()
37160 .mr(3)
37161 .nr(4)
37162 .kr(8)
37163 .sr(1)
37164 .m(3)
37165 .n(n)
37166 .k(8)
37167 .iterations(1)
37168 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
37169 }
37170 }
37171
37172 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_LD128, k_lt_8) {
37173 for (size_t k = 1; k < 8; k++) {
37174 GemmMicrokernelTester()
37175 .mr(3)
37176 .nr(4)
37177 .kr(8)
37178 .sr(1)
37179 .m(3)
37180 .n(4)
37181 .k(k)
37182 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
37183 }
37184 }
37185
37186 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_LD128, k_lt_8_subtile) {
37187 for (size_t k = 1; k < 8; k++) {
37188 for (uint32_t m = 1; m <= 3; m++) {
37189 for (uint32_t n = 1; n <= 4; n++) {
37190 GemmMicrokernelTester()
37191 .mr(3)
37192 .nr(4)
37193 .kr(8)
37194 .sr(1)
37195 .m(m)
37196 .n(n)
37197 .k(k)
37198 .iterations(1)
37199 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
37200 }
37201 }
37202 }
37203 }
37204
37205 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_LD128, k_gt_8) {
37206 for (size_t k = 9; k < 16; k++) {
37207 GemmMicrokernelTester()
37208 .mr(3)
37209 .nr(4)
37210 .kr(8)
37211 .sr(1)
37212 .m(3)
37213 .n(4)
37214 .k(k)
37215 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
37216 }
37217 }
37218
37219 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_LD128, k_gt_8_subtile) {
37220 for (size_t k = 9; k < 16; k++) {
37221 for (uint32_t m = 1; m <= 3; m++) {
37222 for (uint32_t n = 1; n <= 4; n++) {
37223 GemmMicrokernelTester()
37224 .mr(3)
37225 .nr(4)
37226 .kr(8)
37227 .sr(1)
37228 .m(m)
37229 .n(n)
37230 .k(k)
37231 .iterations(1)
37232 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
37233 }
37234 }
37235 }
37236 }
37237
37238 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_LD128, k_div_8) {
37239 for (size_t k = 16; k <= 80; k += 8) {
37240 GemmMicrokernelTester()
37241 .mr(3)
37242 .nr(4)
37243 .kr(8)
37244 .sr(1)
37245 .m(3)
37246 .n(4)
37247 .k(k)
37248 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
37249 }
37250 }
37251
37252 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_LD128, k_div_8_subtile) {
37253 for (size_t k = 16; k <= 80; k += 8) {
37254 for (uint32_t m = 1; m <= 3; m++) {
37255 for (uint32_t n = 1; n <= 4; n++) {
37256 GemmMicrokernelTester()
37257 .mr(3)
37258 .nr(4)
37259 .kr(8)
37260 .sr(1)
37261 .m(m)
37262 .n(n)
37263 .k(k)
37264 .iterations(1)
37265 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
37266 }
37267 }
37268 }
37269 }
37270
37271 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_LD128, n_gt_4) {
37272 for (uint32_t n = 5; n < 8; n++) {
37273 for (size_t k = 1; k <= 40; k += 9) {
37274 GemmMicrokernelTester()
37275 .mr(3)
37276 .nr(4)
37277 .kr(8)
37278 .sr(1)
37279 .m(3)
37280 .n(4)
37281 .k(k)
37282 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
37283 }
37284 }
37285 }
37286
37287 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_LD128, n_gt_4_strided_cn) {
37288 for (uint32_t n = 5; n < 8; n++) {
37289 for (size_t k = 1; k <= 40; k += 9) {
37290 GemmMicrokernelTester()
37291 .mr(3)
37292 .nr(4)
37293 .kr(8)
37294 .sr(1)
37295 .m(3)
37296 .n(4)
37297 .k(k)
37298 .cn_stride(7)
37299 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
37300 }
37301 }
37302 }
37303
37304 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_LD128, n_gt_4_subtile) {
37305 for (uint32_t n = 5; n < 8; n++) {
37306 for (size_t k = 1; k <= 40; k += 9) {
37307 for (uint32_t m = 1; m <= 3; m++) {
37308 GemmMicrokernelTester()
37309 .mr(3)
37310 .nr(4)
37311 .kr(8)
37312 .sr(1)
37313 .m(m)
37314 .n(n)
37315 .k(k)
37316 .iterations(1)
37317 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
37318 }
37319 }
37320 }
37321 }
37322
37323 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_LD128, n_div_4) {
37324 for (uint32_t n = 8; n <= 12; n += 4) {
37325 for (size_t k = 1; k <= 40; k += 9) {
37326 GemmMicrokernelTester()
37327 .mr(3)
37328 .nr(4)
37329 .kr(8)
37330 .sr(1)
37331 .m(3)
37332 .n(4)
37333 .k(k)
37334 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
37335 }
37336 }
37337 }
37338
37339 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_LD128, n_div_4_strided_cn) {
37340 for (uint32_t n = 8; n <= 12; n += 4) {
37341 for (size_t k = 1; k <= 40; k += 9) {
37342 GemmMicrokernelTester()
37343 .mr(3)
37344 .nr(4)
37345 .kr(8)
37346 .sr(1)
37347 .m(3)
37348 .n(n)
37349 .k(k)
37350 .cn_stride(7)
37351 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
37352 }
37353 }
37354 }
37355
37356 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_LD128, n_div_4_subtile) {
37357 for (uint32_t n = 8; n <= 12; n += 4) {
37358 for (size_t k = 1; k <= 40; k += 9) {
37359 for (uint32_t m = 1; m <= 3; m++) {
37360 GemmMicrokernelTester()
37361 .mr(3)
37362 .nr(4)
37363 .kr(8)
37364 .sr(1)
37365 .m(m)
37366 .n(n)
37367 .k(k)
37368 .iterations(1)
37369 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
37370 }
37371 }
37372 }
37373 }
37374
37375 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_LD128, small_kernel) {
37376 for (size_t k = 1; k <= 40; k += 9) {
37377 GemmMicrokernelTester()
37378 .mr(3)
37379 .nr(4)
37380 .kr(8)
37381 .sr(1)
37382 .m(3)
37383 .n(4)
37384 .k(k)
37385 .ks(3)
37386 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
37387 }
37388 }
37389
37390 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_LD128, small_kernel_subtile) {
37391 for (size_t k = 1; k <= 40; k += 9) {
37392 for (uint32_t m = 1; m <= 3; m++) {
37393 for (uint32_t n = 1; n <= 4; n++) {
37394 GemmMicrokernelTester()
37395 .mr(3)
37396 .nr(4)
37397 .kr(8)
37398 .sr(1)
37399 .m(m)
37400 .n(n)
37401 .k(k)
37402 .ks(3)
37403 .iterations(1)
37404 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
37405 }
37406 }
37407 }
37408 }
37409
37410 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_LD128, n_gt_4_small_kernel) {
37411 for (uint32_t n = 5; n < 8; n++) {
37412 for (size_t k = 1; k <= 40; k += 9) {
37413 GemmMicrokernelTester()
37414 .mr(3)
37415 .nr(4)
37416 .kr(8)
37417 .sr(1)
37418 .m(3)
37419 .n(4)
37420 .k(k)
37421 .ks(3)
37422 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
37423 }
37424 }
37425 }
37426
37427 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_LD128, n_div_4_small_kernel) {
37428 for (uint32_t n = 8; n <= 12; n += 4) {
37429 for (size_t k = 1; k <= 40; k += 9) {
37430 GemmMicrokernelTester()
37431 .mr(3)
37432 .nr(4)
37433 .kr(8)
37434 .sr(1)
37435 .m(3)
37436 .n(4)
37437 .k(k)
37438 .ks(3)
37439 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
37440 }
37441 }
37442 }
37443
37444 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_LD128, strided_cm_subtile) {
37445 for (size_t k = 1; k <= 40; k += 9) {
37446 for (uint32_t m = 1; m <= 3; m++) {
37447 for (uint32_t n = 1; n <= 4; n++) {
37448 GemmMicrokernelTester()
37449 .mr(3)
37450 .nr(4)
37451 .kr(8)
37452 .sr(1)
37453 .m(m)
37454 .n(n)
37455 .k(k)
37456 .cm_stride(7)
37457 .iterations(1)
37458 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
37459 }
37460 }
37461 }
37462 }
37463
37464 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_LD128, a_offset) {
37465 for (size_t k = 1; k <= 40; k += 9) {
37466 GemmMicrokernelTester()
37467 .mr(3)
37468 .nr(4)
37469 .kr(8)
37470 .sr(1)
37471 .m(3)
37472 .n(4)
37473 .k(k)
37474 .ks(3)
37475 .a_offset(127)
37476 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
37477 }
37478 }
37479
37480 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_LD128, zero) {
37481 for (uint32_t mz = 0; mz < 3; mz++) {
37482 for (size_t k = 1; k <= 40; k += 9) {
37483 GemmMicrokernelTester()
37484 .mr(3)
37485 .nr(4)
37486 .kr(8)
37487 .sr(1)
37488 .m(3)
37489 .n(4)
37490 .k(k)
37491 .ks(3)
37492 .a_offset(127)
37493 .zero_index(mz)
37494 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
37495 }
37496 }
37497 }
37498
37499 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_LD128, qmin) {
37500 GemmMicrokernelTester()
37501 .mr(3)
37502 .nr(4)
37503 .kr(8)
37504 .sr(1)
37505 .m(3)
37506 .n(4)
37507 .k(8)
37508 .qmin(128)
37509 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
37510 }
37511
37512 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_LD128, qmax) {
37513 GemmMicrokernelTester()
37514 .mr(3)
37515 .nr(4)
37516 .kr(8)
37517 .sr(1)
37518 .m(3)
37519 .n(4)
37520 .k(8)
37521 .qmax(128)
37522 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
37523 }
37524
37525 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_LD128, strided_cm) {
37526 GemmMicrokernelTester()
37527 .mr(3)
37528 .nr(4)
37529 .kr(8)
37530 .sr(1)
37531 .m(3)
37532 .n(4)
37533 .k(8)
37534 .cm_stride(7)
37535 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
37536 }
37537
37538 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_LD128, no_a_zero_point) {
37539 for (size_t k = 1; k <= 40; k += 9) {
37540 GemmMicrokernelTester()
37541 .mr(3)
37542 .nr(4)
37543 .kr(8)
37544 .sr(1)
37545 .m(3)
37546 .n(4)
37547 .k(k)
37548 .a_zero_point(0)
37549 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
37550 }
37551 }
37552
37553 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_LD128, no_b_zero_point) {
37554 for (size_t k = 1; k <= 40; k += 9) {
37555 GemmMicrokernelTester()
37556 .mr(3)
37557 .nr(4)
37558 .kr(8)
37559 .sr(1)
37560 .m(3)
37561 .n(4)
37562 .k(k)
37563 .b_zero_point(0)
37564 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
37565 }
37566 }
37567
37568 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_LD128, no_zero_point) {
37569 for (size_t k = 1; k <= 40; k += 9) {
37570 GemmMicrokernelTester()
37571 .mr(3)
37572 .nr(4)
37573 .kr(8)
37574 .sr(1)
37575 .m(3)
37576 .n(4)
37577 .k(k)
37578 .a_zero_point(0)
37579 .b_zero_point(0)
37580 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_init_qu8_requantization_fp32_params, xnn_qu8_requantize_fp32);
37581 }
37582 }
37583#endif // XNN_ARCH_WASMSIMD