blob: 7f206756d2f3c30827b239e08c074578dd1a3de9 [file] [log] [blame]
Marat Dukhan0b043742021-06-02 18:29:11 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8//
9// Auto-generated file. Do not edit!
10// Specification: test/qc8-gemm-minmax-fp32.yaml
11// Generator: tools/generate-gemm-test.py
12
13
14#include <gtest/gtest.h>
15
16#include <xnnpack/common.h>
17#include <xnnpack/isa-checks.h>
18
19#include <xnnpack/gemm.h>
20#include <xnnpack/igemm.h>
21#include <xnnpack/ppmm.h>
22#include "gemm-microkernel-tester.h"
23
24
25#if XNN_ARCH_X86 || XNN_ARCH_X86_64
26 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2, k_eq_8) {
27 TEST_REQUIRES_X86_AVX2;
28 GemmMicrokernelTester()
29 .mr(1)
30 .nr(8)
31 .kr(8)
32 .sr(1)
33 .m(1)
34 .n(8)
35 .k(8)
36 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
37 }
38
39 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2, strided_cn) {
40 TEST_REQUIRES_X86_AVX2;
41 GemmMicrokernelTester()
42 .mr(1)
43 .nr(8)
44 .kr(8)
45 .sr(1)
46 .m(1)
47 .n(8)
48 .k(8)
49 .cn_stride(11)
50 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
51 }
52
53 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2, k_eq_8_strided_a) {
54 TEST_REQUIRES_X86_AVX2;
55 GemmMicrokernelTester()
56 .mr(1)
57 .nr(8)
58 .kr(8)
59 .sr(1)
60 .m(1)
61 .n(8)
62 .k(8)
63 .a_stride(11)
64 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
65 }
66
67 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2, k_eq_8_subtile) {
68 TEST_REQUIRES_X86_AVX2;
69 for (uint32_t m = 1; m <= 1; m++) {
70 for (uint32_t n = 1; n <= 8; n++) {
71 GemmMicrokernelTester()
72 .mr(1)
73 .nr(8)
74 .kr(8)
75 .sr(1)
76 .m(m)
77 .n(n)
78 .k(8)
79 .iterations(1)
80 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
81 }
82 }
83 }
84
85 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2, k_eq_8_subtile_m) {
86 TEST_REQUIRES_X86_AVX2;
87 for (uint32_t m = 1; m <= 1; m++) {
88 GemmMicrokernelTester()
89 .mr(1)
90 .nr(8)
91 .kr(8)
92 .sr(1)
93 .m(m)
94 .n(8)
95 .k(8)
96 .iterations(1)
97 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
98 }
99 }
100
101 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2, k_eq_8_subtile_n) {
102 TEST_REQUIRES_X86_AVX2;
103 for (uint32_t n = 1; n <= 8; n++) {
104 GemmMicrokernelTester()
105 .mr(1)
106 .nr(8)
107 .kr(8)
108 .sr(1)
109 .m(1)
110 .n(n)
111 .k(8)
112 .iterations(1)
113 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
114 }
115 }
116
117 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2, k_lt_8) {
118 TEST_REQUIRES_X86_AVX2;
119 for (size_t k = 1; k < 8; k++) {
120 GemmMicrokernelTester()
121 .mr(1)
122 .nr(8)
123 .kr(8)
124 .sr(1)
125 .m(1)
126 .n(8)
127 .k(k)
128 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
129 }
130 }
131
132 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2, k_lt_8_strided_a) {
133 TEST_REQUIRES_X86_AVX2;
134 for (size_t k = 1; k < 8; k++) {
135 GemmMicrokernelTester()
136 .mr(1)
137 .nr(8)
138 .kr(8)
139 .sr(1)
140 .m(1)
141 .n(8)
142 .k(k)
143 .a_stride(11)
144 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
145 }
146 }
147
148 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2, k_lt_8_subtile) {
149 TEST_REQUIRES_X86_AVX2;
150 for (size_t k = 1; k < 8; k++) {
151 for (uint32_t m = 1; m <= 1; m++) {
152 for (uint32_t n = 1; n <= 8; n++) {
153 GemmMicrokernelTester()
154 .mr(1)
155 .nr(8)
156 .kr(8)
157 .sr(1)
158 .m(m)
159 .n(n)
160 .k(k)
161 .iterations(1)
162 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
163 }
164 }
165 }
166 }
167
168 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2, k_gt_8) {
169 TEST_REQUIRES_X86_AVX2;
170 for (size_t k = 9; k < 16; k++) {
171 GemmMicrokernelTester()
172 .mr(1)
173 .nr(8)
174 .kr(8)
175 .sr(1)
176 .m(1)
177 .n(8)
178 .k(k)
179 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
180 }
181 }
182
183 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2, k_gt_8_strided_a) {
184 TEST_REQUIRES_X86_AVX2;
185 for (size_t k = 9; k < 16; k++) {
186 GemmMicrokernelTester()
187 .mr(1)
188 .nr(8)
189 .kr(8)
190 .sr(1)
191 .m(1)
192 .n(8)
193 .k(k)
194 .a_stride(19)
195 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
196 }
197 }
198
199 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2, k_gt_8_subtile) {
200 TEST_REQUIRES_X86_AVX2;
201 for (size_t k = 9; k < 16; k++) {
202 for (uint32_t m = 1; m <= 1; m++) {
203 for (uint32_t n = 1; n <= 8; n++) {
204 GemmMicrokernelTester()
205 .mr(1)
206 .nr(8)
207 .kr(8)
208 .sr(1)
209 .m(m)
210 .n(n)
211 .k(k)
212 .iterations(1)
213 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
214 }
215 }
216 }
217 }
218
219 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2, k_div_8) {
220 TEST_REQUIRES_X86_AVX2;
221 for (size_t k = 16; k <= 80; k += 8) {
222 GemmMicrokernelTester()
223 .mr(1)
224 .nr(8)
225 .kr(8)
226 .sr(1)
227 .m(1)
228 .n(8)
229 .k(k)
230 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
231 }
232 }
233
234 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2, k_div_8_strided_a) {
235 TEST_REQUIRES_X86_AVX2;
236 for (size_t k = 16; k <= 80; k += 8) {
237 GemmMicrokernelTester()
238 .mr(1)
239 .nr(8)
240 .kr(8)
241 .sr(1)
242 .m(1)
243 .n(8)
244 .k(k)
245 .a_stride(83)
246 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
247 }
248 }
249
250 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2, k_div_8_subtile) {
251 TEST_REQUIRES_X86_AVX2;
252 for (size_t k = 16; k <= 80; k += 8) {
253 for (uint32_t m = 1; m <= 1; m++) {
254 for (uint32_t n = 1; n <= 8; n++) {
255 GemmMicrokernelTester()
256 .mr(1)
257 .nr(8)
258 .kr(8)
259 .sr(1)
260 .m(m)
261 .n(n)
262 .k(k)
263 .iterations(1)
264 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
265 }
266 }
267 }
268 }
269
270 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2, n_gt_8) {
271 TEST_REQUIRES_X86_AVX2;
272 for (uint32_t n = 9; n < 16; n++) {
273 for (size_t k = 1; k <= 40; k += 9) {
274 GemmMicrokernelTester()
275 .mr(1)
276 .nr(8)
277 .kr(8)
278 .sr(1)
279 .m(1)
280 .n(8)
281 .k(k)
282 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
283 }
284 }
285 }
286
287 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2, n_gt_8_strided_cn) {
288 TEST_REQUIRES_X86_AVX2;
289 for (uint32_t n = 9; n < 16; n++) {
290 for (size_t k = 1; k <= 40; k += 9) {
291 GemmMicrokernelTester()
292 .mr(1)
293 .nr(8)
294 .kr(8)
295 .sr(1)
296 .m(1)
297 .n(8)
298 .k(k)
299 .cn_stride(11)
300 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
301 }
302 }
303 }
304
305 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2, n_gt_8_strided_a) {
306 TEST_REQUIRES_X86_AVX2;
307 for (uint32_t n = 9; n < 16; n++) {
308 for (size_t k = 1; k <= 40; k += 9) {
309 GemmMicrokernelTester()
310 .mr(1)
311 .nr(8)
312 .kr(8)
313 .sr(1)
314 .m(1)
315 .n(n)
316 .k(k)
317 .a_stride(43)
318 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
319 }
320 }
321 }
322
323 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2, n_gt_8_subtile) {
324 TEST_REQUIRES_X86_AVX2;
325 for (uint32_t n = 9; n < 16; n++) {
326 for (size_t k = 1; k <= 40; k += 9) {
327 for (uint32_t m = 1; m <= 1; m++) {
328 GemmMicrokernelTester()
329 .mr(1)
330 .nr(8)
331 .kr(8)
332 .sr(1)
333 .m(m)
334 .n(n)
335 .k(k)
336 .iterations(1)
337 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
338 }
339 }
340 }
341 }
342
343 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2, n_div_8) {
344 TEST_REQUIRES_X86_AVX2;
345 for (uint32_t n = 16; n <= 24; n += 8) {
346 for (size_t k = 1; k <= 40; k += 9) {
347 GemmMicrokernelTester()
348 .mr(1)
349 .nr(8)
350 .kr(8)
351 .sr(1)
352 .m(1)
353 .n(8)
354 .k(k)
355 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
356 }
357 }
358 }
359
360 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2, n_div_8_strided_cn) {
361 TEST_REQUIRES_X86_AVX2;
362 for (uint32_t n = 16; n <= 24; n += 8) {
363 for (size_t k = 1; k <= 40; k += 9) {
364 GemmMicrokernelTester()
365 .mr(1)
366 .nr(8)
367 .kr(8)
368 .sr(1)
369 .m(1)
370 .n(n)
371 .k(k)
372 .cn_stride(11)
373 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
374 }
375 }
376 }
377
378 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2, n_div_8_strided_a) {
379 TEST_REQUIRES_X86_AVX2;
380 for (uint32_t n = 16; n <= 24; n += 8) {
381 for (size_t k = 1; k <= 40; k += 9) {
382 GemmMicrokernelTester()
383 .mr(1)
384 .nr(8)
385 .kr(8)
386 .sr(1)
387 .m(1)
388 .n(n)
389 .k(k)
390 .a_stride(43)
391 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
392 }
393 }
394 }
395
396 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2, n_div_8_subtile) {
397 TEST_REQUIRES_X86_AVX2;
398 for (uint32_t n = 16; n <= 24; n += 8) {
399 for (size_t k = 1; k <= 40; k += 9) {
400 for (uint32_t m = 1; m <= 1; m++) {
401 GemmMicrokernelTester()
402 .mr(1)
403 .nr(8)
404 .kr(8)
405 .sr(1)
406 .m(m)
407 .n(n)
408 .k(k)
409 .iterations(1)
410 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
411 }
412 }
413 }
414 }
415
416 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2, strided_cm_subtile) {
417 TEST_REQUIRES_X86_AVX2;
418 for (size_t k = 1; k <= 40; k += 9) {
419 for (uint32_t m = 1; m <= 1; m++) {
420 for (uint32_t n = 1; n <= 8; n++) {
421 GemmMicrokernelTester()
422 .mr(1)
423 .nr(8)
424 .kr(8)
425 .sr(1)
426 .m(m)
427 .n(n)
428 .k(k)
429 .cm_stride(11)
430 .iterations(1)
431 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
432 }
433 }
434 }
435 }
436
437 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2, qmin) {
438 TEST_REQUIRES_X86_AVX2;
439 GemmMicrokernelTester()
440 .mr(1)
441 .nr(8)
442 .kr(8)
443 .sr(1)
444 .m(1)
445 .n(8)
446 .k(8)
447 .qmin(128)
448 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
449 }
450
451 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2, qmax) {
452 TEST_REQUIRES_X86_AVX2;
453 GemmMicrokernelTester()
454 .mr(1)
455 .nr(8)
456 .kr(8)
457 .sr(1)
458 .m(1)
459 .n(8)
460 .k(8)
461 .qmax(128)
462 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
463 }
464
465 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2, strided_cm) {
466 TEST_REQUIRES_X86_AVX2;
467 GemmMicrokernelTester()
468 .mr(1)
469 .nr(8)
470 .kr(8)
471 .sr(1)
472 .m(1)
473 .n(8)
474 .k(8)
475 .cm_stride(11)
476 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
477 }
478#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
479
480
481#if XNN_ARCH_X86 || XNN_ARCH_X86_64
482 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_eq_8) {
483 TEST_REQUIRES_X86_AVX2;
484 GemmMicrokernelTester()
485 .mr(2)
486 .nr(8)
487 .kr(8)
488 .sr(1)
489 .m(2)
490 .n(8)
491 .k(8)
492 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
493 }
494
495 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2, strided_cn) {
496 TEST_REQUIRES_X86_AVX2;
497 GemmMicrokernelTester()
498 .mr(2)
499 .nr(8)
500 .kr(8)
501 .sr(1)
502 .m(2)
503 .n(8)
504 .k(8)
505 .cn_stride(11)
506 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
507 }
508
509 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_eq_8_strided_a) {
510 TEST_REQUIRES_X86_AVX2;
511 GemmMicrokernelTester()
512 .mr(2)
513 .nr(8)
514 .kr(8)
515 .sr(1)
516 .m(2)
517 .n(8)
518 .k(8)
519 .a_stride(11)
520 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
521 }
522
523 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_eq_8_subtile) {
524 TEST_REQUIRES_X86_AVX2;
525 for (uint32_t m = 1; m <= 2; m++) {
526 for (uint32_t n = 1; n <= 8; n++) {
527 GemmMicrokernelTester()
528 .mr(2)
529 .nr(8)
530 .kr(8)
531 .sr(1)
532 .m(m)
533 .n(n)
534 .k(8)
535 .iterations(1)
536 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
537 }
538 }
539 }
540
541 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_eq_8_subtile_m) {
542 TEST_REQUIRES_X86_AVX2;
543 for (uint32_t m = 1; m <= 2; m++) {
544 GemmMicrokernelTester()
545 .mr(2)
546 .nr(8)
547 .kr(8)
548 .sr(1)
549 .m(m)
550 .n(8)
551 .k(8)
552 .iterations(1)
553 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
554 }
555 }
556
557 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_eq_8_subtile_n) {
558 TEST_REQUIRES_X86_AVX2;
559 for (uint32_t n = 1; n <= 8; n++) {
560 GemmMicrokernelTester()
561 .mr(2)
562 .nr(8)
563 .kr(8)
564 .sr(1)
565 .m(2)
566 .n(n)
567 .k(8)
568 .iterations(1)
569 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
570 }
571 }
572
573 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_lt_8) {
574 TEST_REQUIRES_X86_AVX2;
575 for (size_t k = 1; k < 8; k++) {
576 GemmMicrokernelTester()
577 .mr(2)
578 .nr(8)
579 .kr(8)
580 .sr(1)
581 .m(2)
582 .n(8)
583 .k(k)
584 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
585 }
586 }
587
588 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_lt_8_strided_a) {
589 TEST_REQUIRES_X86_AVX2;
590 for (size_t k = 1; k < 8; k++) {
591 GemmMicrokernelTester()
592 .mr(2)
593 .nr(8)
594 .kr(8)
595 .sr(1)
596 .m(2)
597 .n(8)
598 .k(k)
599 .a_stride(11)
600 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
601 }
602 }
603
604 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_lt_8_subtile) {
605 TEST_REQUIRES_X86_AVX2;
606 for (size_t k = 1; k < 8; k++) {
607 for (uint32_t m = 1; m <= 2; m++) {
608 for (uint32_t n = 1; n <= 8; n++) {
609 GemmMicrokernelTester()
610 .mr(2)
611 .nr(8)
612 .kr(8)
613 .sr(1)
614 .m(m)
615 .n(n)
616 .k(k)
617 .iterations(1)
618 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
619 }
620 }
621 }
622 }
623
624 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_gt_8) {
625 TEST_REQUIRES_X86_AVX2;
626 for (size_t k = 9; k < 16; k++) {
627 GemmMicrokernelTester()
628 .mr(2)
629 .nr(8)
630 .kr(8)
631 .sr(1)
632 .m(2)
633 .n(8)
634 .k(k)
635 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
636 }
637 }
638
639 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_gt_8_strided_a) {
640 TEST_REQUIRES_X86_AVX2;
641 for (size_t k = 9; k < 16; k++) {
642 GemmMicrokernelTester()
643 .mr(2)
644 .nr(8)
645 .kr(8)
646 .sr(1)
647 .m(2)
648 .n(8)
649 .k(k)
650 .a_stride(19)
651 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
652 }
653 }
654
655 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_gt_8_subtile) {
656 TEST_REQUIRES_X86_AVX2;
657 for (size_t k = 9; k < 16; k++) {
658 for (uint32_t m = 1; m <= 2; m++) {
659 for (uint32_t n = 1; n <= 8; n++) {
660 GemmMicrokernelTester()
661 .mr(2)
662 .nr(8)
663 .kr(8)
664 .sr(1)
665 .m(m)
666 .n(n)
667 .k(k)
668 .iterations(1)
669 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
670 }
671 }
672 }
673 }
674
675 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_div_8) {
676 TEST_REQUIRES_X86_AVX2;
677 for (size_t k = 16; k <= 80; k += 8) {
678 GemmMicrokernelTester()
679 .mr(2)
680 .nr(8)
681 .kr(8)
682 .sr(1)
683 .m(2)
684 .n(8)
685 .k(k)
686 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
687 }
688 }
689
690 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_div_8_strided_a) {
691 TEST_REQUIRES_X86_AVX2;
692 for (size_t k = 16; k <= 80; k += 8) {
693 GemmMicrokernelTester()
694 .mr(2)
695 .nr(8)
696 .kr(8)
697 .sr(1)
698 .m(2)
699 .n(8)
700 .k(k)
701 .a_stride(83)
702 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
703 }
704 }
705
706 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_div_8_subtile) {
707 TEST_REQUIRES_X86_AVX2;
708 for (size_t k = 16; k <= 80; k += 8) {
709 for (uint32_t m = 1; m <= 2; m++) {
710 for (uint32_t n = 1; n <= 8; n++) {
711 GemmMicrokernelTester()
712 .mr(2)
713 .nr(8)
714 .kr(8)
715 .sr(1)
716 .m(m)
717 .n(n)
718 .k(k)
719 .iterations(1)
720 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
721 }
722 }
723 }
724 }
725
726 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2, n_gt_8) {
727 TEST_REQUIRES_X86_AVX2;
728 for (uint32_t n = 9; n < 16; n++) {
729 for (size_t k = 1; k <= 40; k += 9) {
730 GemmMicrokernelTester()
731 .mr(2)
732 .nr(8)
733 .kr(8)
734 .sr(1)
735 .m(2)
736 .n(8)
737 .k(k)
738 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
739 }
740 }
741 }
742
743 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2, n_gt_8_strided_cn) {
744 TEST_REQUIRES_X86_AVX2;
745 for (uint32_t n = 9; n < 16; n++) {
746 for (size_t k = 1; k <= 40; k += 9) {
747 GemmMicrokernelTester()
748 .mr(2)
749 .nr(8)
750 .kr(8)
751 .sr(1)
752 .m(2)
753 .n(8)
754 .k(k)
755 .cn_stride(11)
756 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
757 }
758 }
759 }
760
761 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2, n_gt_8_strided_a) {
762 TEST_REQUIRES_X86_AVX2;
763 for (uint32_t n = 9; n < 16; n++) {
764 for (size_t k = 1; k <= 40; k += 9) {
765 GemmMicrokernelTester()
766 .mr(2)
767 .nr(8)
768 .kr(8)
769 .sr(1)
770 .m(2)
771 .n(n)
772 .k(k)
773 .a_stride(43)
774 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
775 }
776 }
777 }
778
779 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2, n_gt_8_subtile) {
780 TEST_REQUIRES_X86_AVX2;
781 for (uint32_t n = 9; n < 16; n++) {
782 for (size_t k = 1; k <= 40; k += 9) {
783 for (uint32_t m = 1; m <= 2; m++) {
784 GemmMicrokernelTester()
785 .mr(2)
786 .nr(8)
787 .kr(8)
788 .sr(1)
789 .m(m)
790 .n(n)
791 .k(k)
792 .iterations(1)
793 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
794 }
795 }
796 }
797 }
798
799 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2, n_div_8) {
800 TEST_REQUIRES_X86_AVX2;
801 for (uint32_t n = 16; n <= 24; n += 8) {
802 for (size_t k = 1; k <= 40; k += 9) {
803 GemmMicrokernelTester()
804 .mr(2)
805 .nr(8)
806 .kr(8)
807 .sr(1)
808 .m(2)
809 .n(8)
810 .k(k)
811 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
812 }
813 }
814 }
815
816 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2, n_div_8_strided_cn) {
817 TEST_REQUIRES_X86_AVX2;
818 for (uint32_t n = 16; n <= 24; n += 8) {
819 for (size_t k = 1; k <= 40; k += 9) {
820 GemmMicrokernelTester()
821 .mr(2)
822 .nr(8)
823 .kr(8)
824 .sr(1)
825 .m(2)
826 .n(n)
827 .k(k)
828 .cn_stride(11)
829 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
830 }
831 }
832 }
833
834 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2, n_div_8_strided_a) {
835 TEST_REQUIRES_X86_AVX2;
836 for (uint32_t n = 16; n <= 24; n += 8) {
837 for (size_t k = 1; k <= 40; k += 9) {
838 GemmMicrokernelTester()
839 .mr(2)
840 .nr(8)
841 .kr(8)
842 .sr(1)
843 .m(2)
844 .n(n)
845 .k(k)
846 .a_stride(43)
847 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
848 }
849 }
850 }
851
852 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2, n_div_8_subtile) {
853 TEST_REQUIRES_X86_AVX2;
854 for (uint32_t n = 16; n <= 24; n += 8) {
855 for (size_t k = 1; k <= 40; k += 9) {
856 for (uint32_t m = 1; m <= 2; m++) {
857 GemmMicrokernelTester()
858 .mr(2)
859 .nr(8)
860 .kr(8)
861 .sr(1)
862 .m(m)
863 .n(n)
864 .k(k)
865 .iterations(1)
866 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
867 }
868 }
869 }
870 }
871
872 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2, strided_cm_subtile) {
873 TEST_REQUIRES_X86_AVX2;
874 for (size_t k = 1; k <= 40; k += 9) {
875 for (uint32_t m = 1; m <= 2; m++) {
876 for (uint32_t n = 1; n <= 8; n++) {
877 GemmMicrokernelTester()
878 .mr(2)
879 .nr(8)
880 .kr(8)
881 .sr(1)
882 .m(m)
883 .n(n)
884 .k(k)
885 .cm_stride(11)
886 .iterations(1)
887 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
888 }
889 }
890 }
891 }
892
893 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2, qmin) {
894 TEST_REQUIRES_X86_AVX2;
895 GemmMicrokernelTester()
896 .mr(2)
897 .nr(8)
898 .kr(8)
899 .sr(1)
900 .m(2)
901 .n(8)
902 .k(8)
903 .qmin(128)
904 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
905 }
906
907 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2, qmax) {
908 TEST_REQUIRES_X86_AVX2;
909 GemmMicrokernelTester()
910 .mr(2)
911 .nr(8)
912 .kr(8)
913 .sr(1)
914 .m(2)
915 .n(8)
916 .k(8)
917 .qmax(128)
918 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
919 }
920
921 TEST(QC8_GEMM_MINMAX_FP32_2X8C8__AVX2, strided_cm) {
922 TEST_REQUIRES_X86_AVX2;
923 GemmMicrokernelTester()
924 .mr(2)
925 .nr(8)
926 .kr(8)
927 .sr(1)
928 .m(2)
929 .n(8)
930 .k(8)
931 .cm_stride(11)
932 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
933 }
934#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
935
936
937#if XNN_ARCH_X86 || XNN_ARCH_X86_64
938 TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_eq_8) {
939 TEST_REQUIRES_X86_AVX2;
940 GemmMicrokernelTester()
941 .mr(3)
942 .nr(8)
943 .kr(8)
944 .sr(1)
945 .m(3)
946 .n(8)
947 .k(8)
948 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
949 }
950
951 TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2, strided_cn) {
952 TEST_REQUIRES_X86_AVX2;
953 GemmMicrokernelTester()
954 .mr(3)
955 .nr(8)
956 .kr(8)
957 .sr(1)
958 .m(3)
959 .n(8)
960 .k(8)
961 .cn_stride(11)
962 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
963 }
964
965 TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_eq_8_strided_a) {
966 TEST_REQUIRES_X86_AVX2;
967 GemmMicrokernelTester()
968 .mr(3)
969 .nr(8)
970 .kr(8)
971 .sr(1)
972 .m(3)
973 .n(8)
974 .k(8)
975 .a_stride(11)
976 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
977 }
978
979 TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_eq_8_subtile) {
980 TEST_REQUIRES_X86_AVX2;
981 for (uint32_t m = 1; m <= 3; m++) {
982 for (uint32_t n = 1; n <= 8; n++) {
983 GemmMicrokernelTester()
984 .mr(3)
985 .nr(8)
986 .kr(8)
987 .sr(1)
988 .m(m)
989 .n(n)
990 .k(8)
991 .iterations(1)
992 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
993 }
994 }
995 }
996
997 TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_eq_8_subtile_m) {
998 TEST_REQUIRES_X86_AVX2;
999 for (uint32_t m = 1; m <= 3; m++) {
1000 GemmMicrokernelTester()
1001 .mr(3)
1002 .nr(8)
1003 .kr(8)
1004 .sr(1)
1005 .m(m)
1006 .n(8)
1007 .k(8)
1008 .iterations(1)
1009 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1010 }
1011 }
1012
1013 TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_eq_8_subtile_n) {
1014 TEST_REQUIRES_X86_AVX2;
1015 for (uint32_t n = 1; n <= 8; n++) {
1016 GemmMicrokernelTester()
1017 .mr(3)
1018 .nr(8)
1019 .kr(8)
1020 .sr(1)
1021 .m(3)
1022 .n(n)
1023 .k(8)
1024 .iterations(1)
1025 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1026 }
1027 }
1028
1029 TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_lt_8) {
1030 TEST_REQUIRES_X86_AVX2;
1031 for (size_t k = 1; k < 8; k++) {
1032 GemmMicrokernelTester()
1033 .mr(3)
1034 .nr(8)
1035 .kr(8)
1036 .sr(1)
1037 .m(3)
1038 .n(8)
1039 .k(k)
1040 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1041 }
1042 }
1043
1044 TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_lt_8_strided_a) {
1045 TEST_REQUIRES_X86_AVX2;
1046 for (size_t k = 1; k < 8; k++) {
1047 GemmMicrokernelTester()
1048 .mr(3)
1049 .nr(8)
1050 .kr(8)
1051 .sr(1)
1052 .m(3)
1053 .n(8)
1054 .k(k)
1055 .a_stride(11)
1056 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1057 }
1058 }
1059
1060 TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_lt_8_subtile) {
1061 TEST_REQUIRES_X86_AVX2;
1062 for (size_t k = 1; k < 8; k++) {
1063 for (uint32_t m = 1; m <= 3; m++) {
1064 for (uint32_t n = 1; n <= 8; n++) {
1065 GemmMicrokernelTester()
1066 .mr(3)
1067 .nr(8)
1068 .kr(8)
1069 .sr(1)
1070 .m(m)
1071 .n(n)
1072 .k(k)
1073 .iterations(1)
1074 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1075 }
1076 }
1077 }
1078 }
1079
1080 TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_gt_8) {
1081 TEST_REQUIRES_X86_AVX2;
1082 for (size_t k = 9; k < 16; k++) {
1083 GemmMicrokernelTester()
1084 .mr(3)
1085 .nr(8)
1086 .kr(8)
1087 .sr(1)
1088 .m(3)
1089 .n(8)
1090 .k(k)
1091 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1092 }
1093 }
1094
1095 TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_gt_8_strided_a) {
1096 TEST_REQUIRES_X86_AVX2;
1097 for (size_t k = 9; k < 16; k++) {
1098 GemmMicrokernelTester()
1099 .mr(3)
1100 .nr(8)
1101 .kr(8)
1102 .sr(1)
1103 .m(3)
1104 .n(8)
1105 .k(k)
1106 .a_stride(19)
1107 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1108 }
1109 }
1110
1111 TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_gt_8_subtile) {
1112 TEST_REQUIRES_X86_AVX2;
1113 for (size_t k = 9; k < 16; k++) {
1114 for (uint32_t m = 1; m <= 3; m++) {
1115 for (uint32_t n = 1; n <= 8; n++) {
1116 GemmMicrokernelTester()
1117 .mr(3)
1118 .nr(8)
1119 .kr(8)
1120 .sr(1)
1121 .m(m)
1122 .n(n)
1123 .k(k)
1124 .iterations(1)
1125 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1126 }
1127 }
1128 }
1129 }
1130
1131 TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_div_8) {
1132 TEST_REQUIRES_X86_AVX2;
1133 for (size_t k = 16; k <= 80; k += 8) {
1134 GemmMicrokernelTester()
1135 .mr(3)
1136 .nr(8)
1137 .kr(8)
1138 .sr(1)
1139 .m(3)
1140 .n(8)
1141 .k(k)
1142 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1143 }
1144 }
1145
1146 TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_div_8_strided_a) {
1147 TEST_REQUIRES_X86_AVX2;
1148 for (size_t k = 16; k <= 80; k += 8) {
1149 GemmMicrokernelTester()
1150 .mr(3)
1151 .nr(8)
1152 .kr(8)
1153 .sr(1)
1154 .m(3)
1155 .n(8)
1156 .k(k)
1157 .a_stride(83)
1158 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1159 }
1160 }
1161
1162 TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_div_8_subtile) {
1163 TEST_REQUIRES_X86_AVX2;
1164 for (size_t k = 16; k <= 80; k += 8) {
1165 for (uint32_t m = 1; m <= 3; m++) {
1166 for (uint32_t n = 1; n <= 8; n++) {
1167 GemmMicrokernelTester()
1168 .mr(3)
1169 .nr(8)
1170 .kr(8)
1171 .sr(1)
1172 .m(m)
1173 .n(n)
1174 .k(k)
1175 .iterations(1)
1176 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1177 }
1178 }
1179 }
1180 }
1181
1182 TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2, n_gt_8) {
1183 TEST_REQUIRES_X86_AVX2;
1184 for (uint32_t n = 9; n < 16; n++) {
1185 for (size_t k = 1; k <= 40; k += 9) {
1186 GemmMicrokernelTester()
1187 .mr(3)
1188 .nr(8)
1189 .kr(8)
1190 .sr(1)
1191 .m(3)
1192 .n(8)
1193 .k(k)
1194 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1195 }
1196 }
1197 }
1198
1199 TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2, n_gt_8_strided_cn) {
1200 TEST_REQUIRES_X86_AVX2;
1201 for (uint32_t n = 9; n < 16; n++) {
1202 for (size_t k = 1; k <= 40; k += 9) {
1203 GemmMicrokernelTester()
1204 .mr(3)
1205 .nr(8)
1206 .kr(8)
1207 .sr(1)
1208 .m(3)
1209 .n(8)
1210 .k(k)
1211 .cn_stride(11)
1212 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1213 }
1214 }
1215 }
1216
1217 TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2, n_gt_8_strided_a) {
1218 TEST_REQUIRES_X86_AVX2;
1219 for (uint32_t n = 9; n < 16; n++) {
1220 for (size_t k = 1; k <= 40; k += 9) {
1221 GemmMicrokernelTester()
1222 .mr(3)
1223 .nr(8)
1224 .kr(8)
1225 .sr(1)
1226 .m(3)
1227 .n(n)
1228 .k(k)
1229 .a_stride(43)
1230 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1231 }
1232 }
1233 }
1234
1235 TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2, n_gt_8_subtile) {
1236 TEST_REQUIRES_X86_AVX2;
1237 for (uint32_t n = 9; n < 16; n++) {
1238 for (size_t k = 1; k <= 40; k += 9) {
1239 for (uint32_t m = 1; m <= 3; m++) {
1240 GemmMicrokernelTester()
1241 .mr(3)
1242 .nr(8)
1243 .kr(8)
1244 .sr(1)
1245 .m(m)
1246 .n(n)
1247 .k(k)
1248 .iterations(1)
1249 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1250 }
1251 }
1252 }
1253 }
1254
1255 TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2, n_div_8) {
1256 TEST_REQUIRES_X86_AVX2;
1257 for (uint32_t n = 16; n <= 24; n += 8) {
1258 for (size_t k = 1; k <= 40; k += 9) {
1259 GemmMicrokernelTester()
1260 .mr(3)
1261 .nr(8)
1262 .kr(8)
1263 .sr(1)
1264 .m(3)
1265 .n(8)
1266 .k(k)
1267 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1268 }
1269 }
1270 }
1271
1272 TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2, n_div_8_strided_cn) {
1273 TEST_REQUIRES_X86_AVX2;
1274 for (uint32_t n = 16; n <= 24; n += 8) {
1275 for (size_t k = 1; k <= 40; k += 9) {
1276 GemmMicrokernelTester()
1277 .mr(3)
1278 .nr(8)
1279 .kr(8)
1280 .sr(1)
1281 .m(3)
1282 .n(n)
1283 .k(k)
1284 .cn_stride(11)
1285 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1286 }
1287 }
1288 }
1289
1290 TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2, n_div_8_strided_a) {
1291 TEST_REQUIRES_X86_AVX2;
1292 for (uint32_t n = 16; n <= 24; n += 8) {
1293 for (size_t k = 1; k <= 40; k += 9) {
1294 GemmMicrokernelTester()
1295 .mr(3)
1296 .nr(8)
1297 .kr(8)
1298 .sr(1)
1299 .m(3)
1300 .n(n)
1301 .k(k)
1302 .a_stride(43)
1303 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1304 }
1305 }
1306 }
1307
1308 TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2, n_div_8_subtile) {
1309 TEST_REQUIRES_X86_AVX2;
1310 for (uint32_t n = 16; n <= 24; n += 8) {
1311 for (size_t k = 1; k <= 40; k += 9) {
1312 for (uint32_t m = 1; m <= 3; m++) {
1313 GemmMicrokernelTester()
1314 .mr(3)
1315 .nr(8)
1316 .kr(8)
1317 .sr(1)
1318 .m(m)
1319 .n(n)
1320 .k(k)
1321 .iterations(1)
1322 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1323 }
1324 }
1325 }
1326 }
1327
1328 TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2, strided_cm_subtile) {
1329 TEST_REQUIRES_X86_AVX2;
1330 for (size_t k = 1; k <= 40; k += 9) {
1331 for (uint32_t m = 1; m <= 3; m++) {
1332 for (uint32_t n = 1; n <= 8; n++) {
1333 GemmMicrokernelTester()
1334 .mr(3)
1335 .nr(8)
1336 .kr(8)
1337 .sr(1)
1338 .m(m)
1339 .n(n)
1340 .k(k)
1341 .cm_stride(11)
1342 .iterations(1)
1343 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1344 }
1345 }
1346 }
1347 }
1348
1349 TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2, qmin) {
1350 TEST_REQUIRES_X86_AVX2;
1351 GemmMicrokernelTester()
1352 .mr(3)
1353 .nr(8)
1354 .kr(8)
1355 .sr(1)
1356 .m(3)
1357 .n(8)
1358 .k(8)
1359 .qmin(128)
1360 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1361 }
1362
1363 TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2, qmax) {
1364 TEST_REQUIRES_X86_AVX2;
1365 GemmMicrokernelTester()
1366 .mr(3)
1367 .nr(8)
1368 .kr(8)
1369 .sr(1)
1370 .m(3)
1371 .n(8)
1372 .k(8)
1373 .qmax(128)
1374 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1375 }
1376
1377 TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2, strided_cm) {
1378 TEST_REQUIRES_X86_AVX2;
1379 GemmMicrokernelTester()
1380 .mr(3)
1381 .nr(8)
1382 .kr(8)
1383 .sr(1)
1384 .m(3)
1385 .n(8)
1386 .k(8)
1387 .cm_stride(11)
1388 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1389 }
1390#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1391
1392
1393#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1394 TEST(QC8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2, k_eq_8) {
1395 TEST_REQUIRES_X86_AVX2;
1396 GemmMicrokernelTester()
1397 .extended_weights(true)
1398 .mr(1)
1399 .nr(8)
1400 .kr(8)
1401 .sr(1)
1402 .m(1)
1403 .n(8)
1404 .k(8)
1405 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1406 }
1407
1408 TEST(QC8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2, strided_cn) {
1409 TEST_REQUIRES_X86_AVX2;
1410 GemmMicrokernelTester()
1411 .extended_weights(true)
1412 .mr(1)
1413 .nr(8)
1414 .kr(8)
1415 .sr(1)
1416 .m(1)
1417 .n(8)
1418 .k(8)
1419 .cn_stride(11)
1420 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1421 }
1422
1423 TEST(QC8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2, k_eq_8_strided_a) {
1424 TEST_REQUIRES_X86_AVX2;
1425 GemmMicrokernelTester()
1426 .extended_weights(true)
1427 .mr(1)
1428 .nr(8)
1429 .kr(8)
1430 .sr(1)
1431 .m(1)
1432 .n(8)
1433 .k(8)
1434 .a_stride(11)
1435 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1436 }
1437
1438 TEST(QC8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2, k_eq_8_subtile) {
1439 TEST_REQUIRES_X86_AVX2;
1440 for (uint32_t m = 1; m <= 1; m++) {
1441 for (uint32_t n = 1; n <= 8; n++) {
1442 GemmMicrokernelTester()
1443 .extended_weights(true)
1444 .mr(1)
1445 .nr(8)
1446 .kr(8)
1447 .sr(1)
1448 .m(m)
1449 .n(n)
1450 .k(8)
1451 .iterations(1)
1452 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1453 }
1454 }
1455 }
1456
1457 TEST(QC8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2, k_eq_8_subtile_m) {
1458 TEST_REQUIRES_X86_AVX2;
1459 for (uint32_t m = 1; m <= 1; m++) {
1460 GemmMicrokernelTester()
1461 .extended_weights(true)
1462 .mr(1)
1463 .nr(8)
1464 .kr(8)
1465 .sr(1)
1466 .m(m)
1467 .n(8)
1468 .k(8)
1469 .iterations(1)
1470 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1471 }
1472 }
1473
1474 TEST(QC8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2, k_eq_8_subtile_n) {
1475 TEST_REQUIRES_X86_AVX2;
1476 for (uint32_t n = 1; n <= 8; n++) {
1477 GemmMicrokernelTester()
1478 .extended_weights(true)
1479 .mr(1)
1480 .nr(8)
1481 .kr(8)
1482 .sr(1)
1483 .m(1)
1484 .n(n)
1485 .k(8)
1486 .iterations(1)
1487 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1488 }
1489 }
1490
1491 TEST(QC8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2, k_lt_8) {
1492 TEST_REQUIRES_X86_AVX2;
1493 for (size_t k = 1; k < 8; k++) {
1494 GemmMicrokernelTester()
1495 .extended_weights(true)
1496 .mr(1)
1497 .nr(8)
1498 .kr(8)
1499 .sr(1)
1500 .m(1)
1501 .n(8)
1502 .k(k)
1503 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1504 }
1505 }
1506
1507 TEST(QC8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2, k_lt_8_strided_a) {
1508 TEST_REQUIRES_X86_AVX2;
1509 for (size_t k = 1; k < 8; k++) {
1510 GemmMicrokernelTester()
1511 .extended_weights(true)
1512 .mr(1)
1513 .nr(8)
1514 .kr(8)
1515 .sr(1)
1516 .m(1)
1517 .n(8)
1518 .k(k)
1519 .a_stride(11)
1520 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1521 }
1522 }
1523
1524 TEST(QC8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2, k_lt_8_subtile) {
1525 TEST_REQUIRES_X86_AVX2;
1526 for (size_t k = 1; k < 8; k++) {
1527 for (uint32_t m = 1; m <= 1; m++) {
1528 for (uint32_t n = 1; n <= 8; n++) {
1529 GemmMicrokernelTester()
1530 .extended_weights(true)
1531 .mr(1)
1532 .nr(8)
1533 .kr(8)
1534 .sr(1)
1535 .m(m)
1536 .n(n)
1537 .k(k)
1538 .iterations(1)
1539 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1540 }
1541 }
1542 }
1543 }
1544
1545 TEST(QC8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2, k_gt_8) {
1546 TEST_REQUIRES_X86_AVX2;
1547 for (size_t k = 9; k < 16; k++) {
1548 GemmMicrokernelTester()
1549 .extended_weights(true)
1550 .mr(1)
1551 .nr(8)
1552 .kr(8)
1553 .sr(1)
1554 .m(1)
1555 .n(8)
1556 .k(k)
1557 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1558 }
1559 }
1560
1561 TEST(QC8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2, k_gt_8_strided_a) {
1562 TEST_REQUIRES_X86_AVX2;
1563 for (size_t k = 9; k < 16; k++) {
1564 GemmMicrokernelTester()
1565 .extended_weights(true)
1566 .mr(1)
1567 .nr(8)
1568 .kr(8)
1569 .sr(1)
1570 .m(1)
1571 .n(8)
1572 .k(k)
1573 .a_stride(19)
1574 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1575 }
1576 }
1577
1578 TEST(QC8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2, k_gt_8_subtile) {
1579 TEST_REQUIRES_X86_AVX2;
1580 for (size_t k = 9; k < 16; k++) {
1581 for (uint32_t m = 1; m <= 1; m++) {
1582 for (uint32_t n = 1; n <= 8; n++) {
1583 GemmMicrokernelTester()
1584 .extended_weights(true)
1585 .mr(1)
1586 .nr(8)
1587 .kr(8)
1588 .sr(1)
1589 .m(m)
1590 .n(n)
1591 .k(k)
1592 .iterations(1)
1593 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1594 }
1595 }
1596 }
1597 }
1598
1599 TEST(QC8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2, k_div_8) {
1600 TEST_REQUIRES_X86_AVX2;
1601 for (size_t k = 16; k <= 80; k += 8) {
1602 GemmMicrokernelTester()
1603 .extended_weights(true)
1604 .mr(1)
1605 .nr(8)
1606 .kr(8)
1607 .sr(1)
1608 .m(1)
1609 .n(8)
1610 .k(k)
1611 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1612 }
1613 }
1614
1615 TEST(QC8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2, k_div_8_strided_a) {
1616 TEST_REQUIRES_X86_AVX2;
1617 for (size_t k = 16; k <= 80; k += 8) {
1618 GemmMicrokernelTester()
1619 .extended_weights(true)
1620 .mr(1)
1621 .nr(8)
1622 .kr(8)
1623 .sr(1)
1624 .m(1)
1625 .n(8)
1626 .k(k)
1627 .a_stride(83)
1628 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1629 }
1630 }
1631
1632 TEST(QC8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2, k_div_8_subtile) {
1633 TEST_REQUIRES_X86_AVX2;
1634 for (size_t k = 16; k <= 80; k += 8) {
1635 for (uint32_t m = 1; m <= 1; m++) {
1636 for (uint32_t n = 1; n <= 8; n++) {
1637 GemmMicrokernelTester()
1638 .extended_weights(true)
1639 .mr(1)
1640 .nr(8)
1641 .kr(8)
1642 .sr(1)
1643 .m(m)
1644 .n(n)
1645 .k(k)
1646 .iterations(1)
1647 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1648 }
1649 }
1650 }
1651 }
1652
1653 TEST(QC8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2, n_gt_8) {
1654 TEST_REQUIRES_X86_AVX2;
1655 for (uint32_t n = 9; n < 16; n++) {
1656 for (size_t k = 1; k <= 40; k += 9) {
1657 GemmMicrokernelTester()
1658 .extended_weights(true)
1659 .mr(1)
1660 .nr(8)
1661 .kr(8)
1662 .sr(1)
1663 .m(1)
1664 .n(8)
1665 .k(k)
1666 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1667 }
1668 }
1669 }
1670
1671 TEST(QC8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2, n_gt_8_strided_cn) {
1672 TEST_REQUIRES_X86_AVX2;
1673 for (uint32_t n = 9; n < 16; n++) {
1674 for (size_t k = 1; k <= 40; k += 9) {
1675 GemmMicrokernelTester()
1676 .extended_weights(true)
1677 .mr(1)
1678 .nr(8)
1679 .kr(8)
1680 .sr(1)
1681 .m(1)
1682 .n(8)
1683 .k(k)
1684 .cn_stride(11)
1685 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1686 }
1687 }
1688 }
1689
1690 TEST(QC8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2, n_gt_8_strided_a) {
1691 TEST_REQUIRES_X86_AVX2;
1692 for (uint32_t n = 9; n < 16; n++) {
1693 for (size_t k = 1; k <= 40; k += 9) {
1694 GemmMicrokernelTester()
1695 .extended_weights(true)
1696 .mr(1)
1697 .nr(8)
1698 .kr(8)
1699 .sr(1)
1700 .m(1)
1701 .n(n)
1702 .k(k)
1703 .a_stride(43)
1704 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1705 }
1706 }
1707 }
1708
1709 TEST(QC8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2, n_gt_8_subtile) {
1710 TEST_REQUIRES_X86_AVX2;
1711 for (uint32_t n = 9; n < 16; n++) {
1712 for (size_t k = 1; k <= 40; k += 9) {
1713 for (uint32_t m = 1; m <= 1; m++) {
1714 GemmMicrokernelTester()
1715 .extended_weights(true)
1716 .mr(1)
1717 .nr(8)
1718 .kr(8)
1719 .sr(1)
1720 .m(m)
1721 .n(n)
1722 .k(k)
1723 .iterations(1)
1724 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1725 }
1726 }
1727 }
1728 }
1729
1730 TEST(QC8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2, n_div_8) {
1731 TEST_REQUIRES_X86_AVX2;
1732 for (uint32_t n = 16; n <= 24; n += 8) {
1733 for (size_t k = 1; k <= 40; k += 9) {
1734 GemmMicrokernelTester()
1735 .extended_weights(true)
1736 .mr(1)
1737 .nr(8)
1738 .kr(8)
1739 .sr(1)
1740 .m(1)
1741 .n(8)
1742 .k(k)
1743 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1744 }
1745 }
1746 }
1747
1748 TEST(QC8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2, n_div_8_strided_cn) {
1749 TEST_REQUIRES_X86_AVX2;
1750 for (uint32_t n = 16; n <= 24; n += 8) {
1751 for (size_t k = 1; k <= 40; k += 9) {
1752 GemmMicrokernelTester()
1753 .extended_weights(true)
1754 .mr(1)
1755 .nr(8)
1756 .kr(8)
1757 .sr(1)
1758 .m(1)
1759 .n(n)
1760 .k(k)
1761 .cn_stride(11)
1762 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1763 }
1764 }
1765 }
1766
1767 TEST(QC8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2, n_div_8_strided_a) {
1768 TEST_REQUIRES_X86_AVX2;
1769 for (uint32_t n = 16; n <= 24; n += 8) {
1770 for (size_t k = 1; k <= 40; k += 9) {
1771 GemmMicrokernelTester()
1772 .extended_weights(true)
1773 .mr(1)
1774 .nr(8)
1775 .kr(8)
1776 .sr(1)
1777 .m(1)
1778 .n(n)
1779 .k(k)
1780 .a_stride(43)
1781 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1782 }
1783 }
1784 }
1785
1786 TEST(QC8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2, n_div_8_subtile) {
1787 TEST_REQUIRES_X86_AVX2;
1788 for (uint32_t n = 16; n <= 24; n += 8) {
1789 for (size_t k = 1; k <= 40; k += 9) {
1790 for (uint32_t m = 1; m <= 1; m++) {
1791 GemmMicrokernelTester()
1792 .extended_weights(true)
1793 .mr(1)
1794 .nr(8)
1795 .kr(8)
1796 .sr(1)
1797 .m(m)
1798 .n(n)
1799 .k(k)
1800 .iterations(1)
1801 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1802 }
1803 }
1804 }
1805 }
1806
1807 TEST(QC8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2, strided_cm_subtile) {
1808 TEST_REQUIRES_X86_AVX2;
1809 for (size_t k = 1; k <= 40; k += 9) {
1810 for (uint32_t m = 1; m <= 1; m++) {
1811 for (uint32_t n = 1; n <= 8; n++) {
1812 GemmMicrokernelTester()
1813 .extended_weights(true)
1814 .mr(1)
1815 .nr(8)
1816 .kr(8)
1817 .sr(1)
1818 .m(m)
1819 .n(n)
1820 .k(k)
1821 .cm_stride(11)
1822 .iterations(1)
1823 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1824 }
1825 }
1826 }
1827 }
1828
1829 TEST(QC8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2, strided_cm) {
1830 TEST_REQUIRES_X86_AVX2;
1831 GemmMicrokernelTester()
1832 .extended_weights(true)
1833 .mr(1)
1834 .nr(8)
1835 .kr(8)
1836 .sr(1)
1837 .m(1)
1838 .n(8)
1839 .k(8)
1840 .cm_stride(11)
1841 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1842 }
1843#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1844
1845
1846#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1847 TEST(QC8_GEMM_XW_MINMAX_FP32_2X8C8__AVX2, k_eq_8) {
1848 TEST_REQUIRES_X86_AVX2;
1849 GemmMicrokernelTester()
1850 .extended_weights(true)
1851 .mr(2)
1852 .nr(8)
1853 .kr(8)
1854 .sr(1)
1855 .m(2)
1856 .n(8)
1857 .k(8)
1858 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1859 }
1860
1861 TEST(QC8_GEMM_XW_MINMAX_FP32_2X8C8__AVX2, strided_cn) {
1862 TEST_REQUIRES_X86_AVX2;
1863 GemmMicrokernelTester()
1864 .extended_weights(true)
1865 .mr(2)
1866 .nr(8)
1867 .kr(8)
1868 .sr(1)
1869 .m(2)
1870 .n(8)
1871 .k(8)
1872 .cn_stride(11)
1873 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1874 }
1875
1876 TEST(QC8_GEMM_XW_MINMAX_FP32_2X8C8__AVX2, k_eq_8_strided_a) {
1877 TEST_REQUIRES_X86_AVX2;
1878 GemmMicrokernelTester()
1879 .extended_weights(true)
1880 .mr(2)
1881 .nr(8)
1882 .kr(8)
1883 .sr(1)
1884 .m(2)
1885 .n(8)
1886 .k(8)
1887 .a_stride(11)
1888 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1889 }
1890
1891 TEST(QC8_GEMM_XW_MINMAX_FP32_2X8C8__AVX2, k_eq_8_subtile) {
1892 TEST_REQUIRES_X86_AVX2;
1893 for (uint32_t m = 1; m <= 2; m++) {
1894 for (uint32_t n = 1; n <= 8; n++) {
1895 GemmMicrokernelTester()
1896 .extended_weights(true)
1897 .mr(2)
1898 .nr(8)
1899 .kr(8)
1900 .sr(1)
1901 .m(m)
1902 .n(n)
1903 .k(8)
1904 .iterations(1)
1905 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1906 }
1907 }
1908 }
1909
1910 TEST(QC8_GEMM_XW_MINMAX_FP32_2X8C8__AVX2, k_eq_8_subtile_m) {
1911 TEST_REQUIRES_X86_AVX2;
1912 for (uint32_t m = 1; m <= 2; m++) {
1913 GemmMicrokernelTester()
1914 .extended_weights(true)
1915 .mr(2)
1916 .nr(8)
1917 .kr(8)
1918 .sr(1)
1919 .m(m)
1920 .n(8)
1921 .k(8)
1922 .iterations(1)
1923 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1924 }
1925 }
1926
1927 TEST(QC8_GEMM_XW_MINMAX_FP32_2X8C8__AVX2, k_eq_8_subtile_n) {
1928 TEST_REQUIRES_X86_AVX2;
1929 for (uint32_t n = 1; n <= 8; n++) {
1930 GemmMicrokernelTester()
1931 .extended_weights(true)
1932 .mr(2)
1933 .nr(8)
1934 .kr(8)
1935 .sr(1)
1936 .m(2)
1937 .n(n)
1938 .k(8)
1939 .iterations(1)
1940 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1941 }
1942 }
1943
1944 TEST(QC8_GEMM_XW_MINMAX_FP32_2X8C8__AVX2, k_lt_8) {
1945 TEST_REQUIRES_X86_AVX2;
1946 for (size_t k = 1; k < 8; k++) {
1947 GemmMicrokernelTester()
1948 .extended_weights(true)
1949 .mr(2)
1950 .nr(8)
1951 .kr(8)
1952 .sr(1)
1953 .m(2)
1954 .n(8)
1955 .k(k)
1956 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1957 }
1958 }
1959
1960 TEST(QC8_GEMM_XW_MINMAX_FP32_2X8C8__AVX2, k_lt_8_strided_a) {
1961 TEST_REQUIRES_X86_AVX2;
1962 for (size_t k = 1; k < 8; k++) {
1963 GemmMicrokernelTester()
1964 .extended_weights(true)
1965 .mr(2)
1966 .nr(8)
1967 .kr(8)
1968 .sr(1)
1969 .m(2)
1970 .n(8)
1971 .k(k)
1972 .a_stride(11)
1973 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1974 }
1975 }
1976
1977 TEST(QC8_GEMM_XW_MINMAX_FP32_2X8C8__AVX2, k_lt_8_subtile) {
1978 TEST_REQUIRES_X86_AVX2;
1979 for (size_t k = 1; k < 8; k++) {
1980 for (uint32_t m = 1; m <= 2; m++) {
1981 for (uint32_t n = 1; n <= 8; n++) {
1982 GemmMicrokernelTester()
1983 .extended_weights(true)
1984 .mr(2)
1985 .nr(8)
1986 .kr(8)
1987 .sr(1)
1988 .m(m)
1989 .n(n)
1990 .k(k)
1991 .iterations(1)
1992 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
1993 }
1994 }
1995 }
1996 }
1997
1998 TEST(QC8_GEMM_XW_MINMAX_FP32_2X8C8__AVX2, k_gt_8) {
1999 TEST_REQUIRES_X86_AVX2;
2000 for (size_t k = 9; k < 16; k++) {
2001 GemmMicrokernelTester()
2002 .extended_weights(true)
2003 .mr(2)
2004 .nr(8)
2005 .kr(8)
2006 .sr(1)
2007 .m(2)
2008 .n(8)
2009 .k(k)
2010 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2011 }
2012 }
2013
2014 TEST(QC8_GEMM_XW_MINMAX_FP32_2X8C8__AVX2, k_gt_8_strided_a) {
2015 TEST_REQUIRES_X86_AVX2;
2016 for (size_t k = 9; k < 16; k++) {
2017 GemmMicrokernelTester()
2018 .extended_weights(true)
2019 .mr(2)
2020 .nr(8)
2021 .kr(8)
2022 .sr(1)
2023 .m(2)
2024 .n(8)
2025 .k(k)
2026 .a_stride(19)
2027 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2028 }
2029 }
2030
2031 TEST(QC8_GEMM_XW_MINMAX_FP32_2X8C8__AVX2, k_gt_8_subtile) {
2032 TEST_REQUIRES_X86_AVX2;
2033 for (size_t k = 9; k < 16; k++) {
2034 for (uint32_t m = 1; m <= 2; m++) {
2035 for (uint32_t n = 1; n <= 8; n++) {
2036 GemmMicrokernelTester()
2037 .extended_weights(true)
2038 .mr(2)
2039 .nr(8)
2040 .kr(8)
2041 .sr(1)
2042 .m(m)
2043 .n(n)
2044 .k(k)
2045 .iterations(1)
2046 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2047 }
2048 }
2049 }
2050 }
2051
2052 TEST(QC8_GEMM_XW_MINMAX_FP32_2X8C8__AVX2, k_div_8) {
2053 TEST_REQUIRES_X86_AVX2;
2054 for (size_t k = 16; k <= 80; k += 8) {
2055 GemmMicrokernelTester()
2056 .extended_weights(true)
2057 .mr(2)
2058 .nr(8)
2059 .kr(8)
2060 .sr(1)
2061 .m(2)
2062 .n(8)
2063 .k(k)
2064 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2065 }
2066 }
2067
2068 TEST(QC8_GEMM_XW_MINMAX_FP32_2X8C8__AVX2, k_div_8_strided_a) {
2069 TEST_REQUIRES_X86_AVX2;
2070 for (size_t k = 16; k <= 80; k += 8) {
2071 GemmMicrokernelTester()
2072 .extended_weights(true)
2073 .mr(2)
2074 .nr(8)
2075 .kr(8)
2076 .sr(1)
2077 .m(2)
2078 .n(8)
2079 .k(k)
2080 .a_stride(83)
2081 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2082 }
2083 }
2084
2085 TEST(QC8_GEMM_XW_MINMAX_FP32_2X8C8__AVX2, k_div_8_subtile) {
2086 TEST_REQUIRES_X86_AVX2;
2087 for (size_t k = 16; k <= 80; k += 8) {
2088 for (uint32_t m = 1; m <= 2; m++) {
2089 for (uint32_t n = 1; n <= 8; n++) {
2090 GemmMicrokernelTester()
2091 .extended_weights(true)
2092 .mr(2)
2093 .nr(8)
2094 .kr(8)
2095 .sr(1)
2096 .m(m)
2097 .n(n)
2098 .k(k)
2099 .iterations(1)
2100 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2101 }
2102 }
2103 }
2104 }
2105
2106 TEST(QC8_GEMM_XW_MINMAX_FP32_2X8C8__AVX2, n_gt_8) {
2107 TEST_REQUIRES_X86_AVX2;
2108 for (uint32_t n = 9; n < 16; n++) {
2109 for (size_t k = 1; k <= 40; k += 9) {
2110 GemmMicrokernelTester()
2111 .extended_weights(true)
2112 .mr(2)
2113 .nr(8)
2114 .kr(8)
2115 .sr(1)
2116 .m(2)
2117 .n(8)
2118 .k(k)
2119 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2120 }
2121 }
2122 }
2123
2124 TEST(QC8_GEMM_XW_MINMAX_FP32_2X8C8__AVX2, n_gt_8_strided_cn) {
2125 TEST_REQUIRES_X86_AVX2;
2126 for (uint32_t n = 9; n < 16; n++) {
2127 for (size_t k = 1; k <= 40; k += 9) {
2128 GemmMicrokernelTester()
2129 .extended_weights(true)
2130 .mr(2)
2131 .nr(8)
2132 .kr(8)
2133 .sr(1)
2134 .m(2)
2135 .n(8)
2136 .k(k)
2137 .cn_stride(11)
2138 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2139 }
2140 }
2141 }
2142
2143 TEST(QC8_GEMM_XW_MINMAX_FP32_2X8C8__AVX2, n_gt_8_strided_a) {
2144 TEST_REQUIRES_X86_AVX2;
2145 for (uint32_t n = 9; n < 16; n++) {
2146 for (size_t k = 1; k <= 40; k += 9) {
2147 GemmMicrokernelTester()
2148 .extended_weights(true)
2149 .mr(2)
2150 .nr(8)
2151 .kr(8)
2152 .sr(1)
2153 .m(2)
2154 .n(n)
2155 .k(k)
2156 .a_stride(43)
2157 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2158 }
2159 }
2160 }
2161
2162 TEST(QC8_GEMM_XW_MINMAX_FP32_2X8C8__AVX2, n_gt_8_subtile) {
2163 TEST_REQUIRES_X86_AVX2;
2164 for (uint32_t n = 9; n < 16; n++) {
2165 for (size_t k = 1; k <= 40; k += 9) {
2166 for (uint32_t m = 1; m <= 2; m++) {
2167 GemmMicrokernelTester()
2168 .extended_weights(true)
2169 .mr(2)
2170 .nr(8)
2171 .kr(8)
2172 .sr(1)
2173 .m(m)
2174 .n(n)
2175 .k(k)
2176 .iterations(1)
2177 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2178 }
2179 }
2180 }
2181 }
2182
2183 TEST(QC8_GEMM_XW_MINMAX_FP32_2X8C8__AVX2, n_div_8) {
2184 TEST_REQUIRES_X86_AVX2;
2185 for (uint32_t n = 16; n <= 24; n += 8) {
2186 for (size_t k = 1; k <= 40; k += 9) {
2187 GemmMicrokernelTester()
2188 .extended_weights(true)
2189 .mr(2)
2190 .nr(8)
2191 .kr(8)
2192 .sr(1)
2193 .m(2)
2194 .n(8)
2195 .k(k)
2196 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2197 }
2198 }
2199 }
2200
2201 TEST(QC8_GEMM_XW_MINMAX_FP32_2X8C8__AVX2, n_div_8_strided_cn) {
2202 TEST_REQUIRES_X86_AVX2;
2203 for (uint32_t n = 16; n <= 24; n += 8) {
2204 for (size_t k = 1; k <= 40; k += 9) {
2205 GemmMicrokernelTester()
2206 .extended_weights(true)
2207 .mr(2)
2208 .nr(8)
2209 .kr(8)
2210 .sr(1)
2211 .m(2)
2212 .n(n)
2213 .k(k)
2214 .cn_stride(11)
2215 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2216 }
2217 }
2218 }
2219
2220 TEST(QC8_GEMM_XW_MINMAX_FP32_2X8C8__AVX2, n_div_8_strided_a) {
2221 TEST_REQUIRES_X86_AVX2;
2222 for (uint32_t n = 16; n <= 24; n += 8) {
2223 for (size_t k = 1; k <= 40; k += 9) {
2224 GemmMicrokernelTester()
2225 .extended_weights(true)
2226 .mr(2)
2227 .nr(8)
2228 .kr(8)
2229 .sr(1)
2230 .m(2)
2231 .n(n)
2232 .k(k)
2233 .a_stride(43)
2234 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2235 }
2236 }
2237 }
2238
2239 TEST(QC8_GEMM_XW_MINMAX_FP32_2X8C8__AVX2, n_div_8_subtile) {
2240 TEST_REQUIRES_X86_AVX2;
2241 for (uint32_t n = 16; n <= 24; n += 8) {
2242 for (size_t k = 1; k <= 40; k += 9) {
2243 for (uint32_t m = 1; m <= 2; m++) {
2244 GemmMicrokernelTester()
2245 .extended_weights(true)
2246 .mr(2)
2247 .nr(8)
2248 .kr(8)
2249 .sr(1)
2250 .m(m)
2251 .n(n)
2252 .k(k)
2253 .iterations(1)
2254 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2255 }
2256 }
2257 }
2258 }
2259
2260 TEST(QC8_GEMM_XW_MINMAX_FP32_2X8C8__AVX2, strided_cm_subtile) {
2261 TEST_REQUIRES_X86_AVX2;
2262 for (size_t k = 1; k <= 40; k += 9) {
2263 for (uint32_t m = 1; m <= 2; m++) {
2264 for (uint32_t n = 1; n <= 8; n++) {
2265 GemmMicrokernelTester()
2266 .extended_weights(true)
2267 .mr(2)
2268 .nr(8)
2269 .kr(8)
2270 .sr(1)
2271 .m(m)
2272 .n(n)
2273 .k(k)
2274 .cm_stride(11)
2275 .iterations(1)
2276 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2277 }
2278 }
2279 }
2280 }
2281
2282 TEST(QC8_GEMM_XW_MINMAX_FP32_2X8C8__AVX2, strided_cm) {
2283 TEST_REQUIRES_X86_AVX2;
2284 GemmMicrokernelTester()
2285 .extended_weights(true)
2286 .mr(2)
2287 .nr(8)
2288 .kr(8)
2289 .sr(1)
2290 .m(2)
2291 .n(8)
2292 .k(8)
2293 .cm_stride(11)
2294 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2295 }
2296#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2297
2298
2299#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2300 TEST(QC8_GEMM_XW_MINMAX_FP32_3X8C8__AVX2, k_eq_8) {
2301 TEST_REQUIRES_X86_AVX2;
2302 GemmMicrokernelTester()
2303 .extended_weights(true)
2304 .mr(3)
2305 .nr(8)
2306 .kr(8)
2307 .sr(1)
2308 .m(3)
2309 .n(8)
2310 .k(8)
2311 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2312 }
2313
2314 TEST(QC8_GEMM_XW_MINMAX_FP32_3X8C8__AVX2, strided_cn) {
2315 TEST_REQUIRES_X86_AVX2;
2316 GemmMicrokernelTester()
2317 .extended_weights(true)
2318 .mr(3)
2319 .nr(8)
2320 .kr(8)
2321 .sr(1)
2322 .m(3)
2323 .n(8)
2324 .k(8)
2325 .cn_stride(11)
2326 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2327 }
2328
2329 TEST(QC8_GEMM_XW_MINMAX_FP32_3X8C8__AVX2, k_eq_8_strided_a) {
2330 TEST_REQUIRES_X86_AVX2;
2331 GemmMicrokernelTester()
2332 .extended_weights(true)
2333 .mr(3)
2334 .nr(8)
2335 .kr(8)
2336 .sr(1)
2337 .m(3)
2338 .n(8)
2339 .k(8)
2340 .a_stride(11)
2341 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2342 }
2343
2344 TEST(QC8_GEMM_XW_MINMAX_FP32_3X8C8__AVX2, k_eq_8_subtile) {
2345 TEST_REQUIRES_X86_AVX2;
2346 for (uint32_t m = 1; m <= 3; m++) {
2347 for (uint32_t n = 1; n <= 8; n++) {
2348 GemmMicrokernelTester()
2349 .extended_weights(true)
2350 .mr(3)
2351 .nr(8)
2352 .kr(8)
2353 .sr(1)
2354 .m(m)
2355 .n(n)
2356 .k(8)
2357 .iterations(1)
2358 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2359 }
2360 }
2361 }
2362
2363 TEST(QC8_GEMM_XW_MINMAX_FP32_3X8C8__AVX2, k_eq_8_subtile_m) {
2364 TEST_REQUIRES_X86_AVX2;
2365 for (uint32_t m = 1; m <= 3; m++) {
2366 GemmMicrokernelTester()
2367 .extended_weights(true)
2368 .mr(3)
2369 .nr(8)
2370 .kr(8)
2371 .sr(1)
2372 .m(m)
2373 .n(8)
2374 .k(8)
2375 .iterations(1)
2376 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2377 }
2378 }
2379
2380 TEST(QC8_GEMM_XW_MINMAX_FP32_3X8C8__AVX2, k_eq_8_subtile_n) {
2381 TEST_REQUIRES_X86_AVX2;
2382 for (uint32_t n = 1; n <= 8; n++) {
2383 GemmMicrokernelTester()
2384 .extended_weights(true)
2385 .mr(3)
2386 .nr(8)
2387 .kr(8)
2388 .sr(1)
2389 .m(3)
2390 .n(n)
2391 .k(8)
2392 .iterations(1)
2393 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2394 }
2395 }
2396
2397 TEST(QC8_GEMM_XW_MINMAX_FP32_3X8C8__AVX2, k_lt_8) {
2398 TEST_REQUIRES_X86_AVX2;
2399 for (size_t k = 1; k < 8; k++) {
2400 GemmMicrokernelTester()
2401 .extended_weights(true)
2402 .mr(3)
2403 .nr(8)
2404 .kr(8)
2405 .sr(1)
2406 .m(3)
2407 .n(8)
2408 .k(k)
2409 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2410 }
2411 }
2412
2413 TEST(QC8_GEMM_XW_MINMAX_FP32_3X8C8__AVX2, k_lt_8_strided_a) {
2414 TEST_REQUIRES_X86_AVX2;
2415 for (size_t k = 1; k < 8; k++) {
2416 GemmMicrokernelTester()
2417 .extended_weights(true)
2418 .mr(3)
2419 .nr(8)
2420 .kr(8)
2421 .sr(1)
2422 .m(3)
2423 .n(8)
2424 .k(k)
2425 .a_stride(11)
2426 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2427 }
2428 }
2429
2430 TEST(QC8_GEMM_XW_MINMAX_FP32_3X8C8__AVX2, k_lt_8_subtile) {
2431 TEST_REQUIRES_X86_AVX2;
2432 for (size_t k = 1; k < 8; k++) {
2433 for (uint32_t m = 1; m <= 3; m++) {
2434 for (uint32_t n = 1; n <= 8; n++) {
2435 GemmMicrokernelTester()
2436 .extended_weights(true)
2437 .mr(3)
2438 .nr(8)
2439 .kr(8)
2440 .sr(1)
2441 .m(m)
2442 .n(n)
2443 .k(k)
2444 .iterations(1)
2445 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2446 }
2447 }
2448 }
2449 }
2450
2451 TEST(QC8_GEMM_XW_MINMAX_FP32_3X8C8__AVX2, k_gt_8) {
2452 TEST_REQUIRES_X86_AVX2;
2453 for (size_t k = 9; k < 16; k++) {
2454 GemmMicrokernelTester()
2455 .extended_weights(true)
2456 .mr(3)
2457 .nr(8)
2458 .kr(8)
2459 .sr(1)
2460 .m(3)
2461 .n(8)
2462 .k(k)
2463 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2464 }
2465 }
2466
2467 TEST(QC8_GEMM_XW_MINMAX_FP32_3X8C8__AVX2, k_gt_8_strided_a) {
2468 TEST_REQUIRES_X86_AVX2;
2469 for (size_t k = 9; k < 16; k++) {
2470 GemmMicrokernelTester()
2471 .extended_weights(true)
2472 .mr(3)
2473 .nr(8)
2474 .kr(8)
2475 .sr(1)
2476 .m(3)
2477 .n(8)
2478 .k(k)
2479 .a_stride(19)
2480 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2481 }
2482 }
2483
2484 TEST(QC8_GEMM_XW_MINMAX_FP32_3X8C8__AVX2, k_gt_8_subtile) {
2485 TEST_REQUIRES_X86_AVX2;
2486 for (size_t k = 9; k < 16; k++) {
2487 for (uint32_t m = 1; m <= 3; m++) {
2488 for (uint32_t n = 1; n <= 8; n++) {
2489 GemmMicrokernelTester()
2490 .extended_weights(true)
2491 .mr(3)
2492 .nr(8)
2493 .kr(8)
2494 .sr(1)
2495 .m(m)
2496 .n(n)
2497 .k(k)
2498 .iterations(1)
2499 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2500 }
2501 }
2502 }
2503 }
2504
2505 TEST(QC8_GEMM_XW_MINMAX_FP32_3X8C8__AVX2, k_div_8) {
2506 TEST_REQUIRES_X86_AVX2;
2507 for (size_t k = 16; k <= 80; k += 8) {
2508 GemmMicrokernelTester()
2509 .extended_weights(true)
2510 .mr(3)
2511 .nr(8)
2512 .kr(8)
2513 .sr(1)
2514 .m(3)
2515 .n(8)
2516 .k(k)
2517 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2518 }
2519 }
2520
2521 TEST(QC8_GEMM_XW_MINMAX_FP32_3X8C8__AVX2, k_div_8_strided_a) {
2522 TEST_REQUIRES_X86_AVX2;
2523 for (size_t k = 16; k <= 80; k += 8) {
2524 GemmMicrokernelTester()
2525 .extended_weights(true)
2526 .mr(3)
2527 .nr(8)
2528 .kr(8)
2529 .sr(1)
2530 .m(3)
2531 .n(8)
2532 .k(k)
2533 .a_stride(83)
2534 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2535 }
2536 }
2537
2538 TEST(QC8_GEMM_XW_MINMAX_FP32_3X8C8__AVX2, k_div_8_subtile) {
2539 TEST_REQUIRES_X86_AVX2;
2540 for (size_t k = 16; k <= 80; k += 8) {
2541 for (uint32_t m = 1; m <= 3; m++) {
2542 for (uint32_t n = 1; n <= 8; n++) {
2543 GemmMicrokernelTester()
2544 .extended_weights(true)
2545 .mr(3)
2546 .nr(8)
2547 .kr(8)
2548 .sr(1)
2549 .m(m)
2550 .n(n)
2551 .k(k)
2552 .iterations(1)
2553 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2554 }
2555 }
2556 }
2557 }
2558
2559 TEST(QC8_GEMM_XW_MINMAX_FP32_3X8C8__AVX2, n_gt_8) {
2560 TEST_REQUIRES_X86_AVX2;
2561 for (uint32_t n = 9; n < 16; n++) {
2562 for (size_t k = 1; k <= 40; k += 9) {
2563 GemmMicrokernelTester()
2564 .extended_weights(true)
2565 .mr(3)
2566 .nr(8)
2567 .kr(8)
2568 .sr(1)
2569 .m(3)
2570 .n(8)
2571 .k(k)
2572 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2573 }
2574 }
2575 }
2576
2577 TEST(QC8_GEMM_XW_MINMAX_FP32_3X8C8__AVX2, n_gt_8_strided_cn) {
2578 TEST_REQUIRES_X86_AVX2;
2579 for (uint32_t n = 9; n < 16; n++) {
2580 for (size_t k = 1; k <= 40; k += 9) {
2581 GemmMicrokernelTester()
2582 .extended_weights(true)
2583 .mr(3)
2584 .nr(8)
2585 .kr(8)
2586 .sr(1)
2587 .m(3)
2588 .n(8)
2589 .k(k)
2590 .cn_stride(11)
2591 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2592 }
2593 }
2594 }
2595
2596 TEST(QC8_GEMM_XW_MINMAX_FP32_3X8C8__AVX2, n_gt_8_strided_a) {
2597 TEST_REQUIRES_X86_AVX2;
2598 for (uint32_t n = 9; n < 16; n++) {
2599 for (size_t k = 1; k <= 40; k += 9) {
2600 GemmMicrokernelTester()
2601 .extended_weights(true)
2602 .mr(3)
2603 .nr(8)
2604 .kr(8)
2605 .sr(1)
2606 .m(3)
2607 .n(n)
2608 .k(k)
2609 .a_stride(43)
2610 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2611 }
2612 }
2613 }
2614
2615 TEST(QC8_GEMM_XW_MINMAX_FP32_3X8C8__AVX2, n_gt_8_subtile) {
2616 TEST_REQUIRES_X86_AVX2;
2617 for (uint32_t n = 9; n < 16; n++) {
2618 for (size_t k = 1; k <= 40; k += 9) {
2619 for (uint32_t m = 1; m <= 3; m++) {
2620 GemmMicrokernelTester()
2621 .extended_weights(true)
2622 .mr(3)
2623 .nr(8)
2624 .kr(8)
2625 .sr(1)
2626 .m(m)
2627 .n(n)
2628 .k(k)
2629 .iterations(1)
2630 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2631 }
2632 }
2633 }
2634 }
2635
2636 TEST(QC8_GEMM_XW_MINMAX_FP32_3X8C8__AVX2, n_div_8) {
2637 TEST_REQUIRES_X86_AVX2;
2638 for (uint32_t n = 16; n <= 24; n += 8) {
2639 for (size_t k = 1; k <= 40; k += 9) {
2640 GemmMicrokernelTester()
2641 .extended_weights(true)
2642 .mr(3)
2643 .nr(8)
2644 .kr(8)
2645 .sr(1)
2646 .m(3)
2647 .n(8)
2648 .k(k)
2649 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2650 }
2651 }
2652 }
2653
2654 TEST(QC8_GEMM_XW_MINMAX_FP32_3X8C8__AVX2, n_div_8_strided_cn) {
2655 TEST_REQUIRES_X86_AVX2;
2656 for (uint32_t n = 16; n <= 24; n += 8) {
2657 for (size_t k = 1; k <= 40; k += 9) {
2658 GemmMicrokernelTester()
2659 .extended_weights(true)
2660 .mr(3)
2661 .nr(8)
2662 .kr(8)
2663 .sr(1)
2664 .m(3)
2665 .n(n)
2666 .k(k)
2667 .cn_stride(11)
2668 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2669 }
2670 }
2671 }
2672
2673 TEST(QC8_GEMM_XW_MINMAX_FP32_3X8C8__AVX2, n_div_8_strided_a) {
2674 TEST_REQUIRES_X86_AVX2;
2675 for (uint32_t n = 16; n <= 24; n += 8) {
2676 for (size_t k = 1; k <= 40; k += 9) {
2677 GemmMicrokernelTester()
2678 .extended_weights(true)
2679 .mr(3)
2680 .nr(8)
2681 .kr(8)
2682 .sr(1)
2683 .m(3)
2684 .n(n)
2685 .k(k)
2686 .a_stride(43)
2687 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2688 }
2689 }
2690 }
2691
2692 TEST(QC8_GEMM_XW_MINMAX_FP32_3X8C8__AVX2, n_div_8_subtile) {
2693 TEST_REQUIRES_X86_AVX2;
2694 for (uint32_t n = 16; n <= 24; n += 8) {
2695 for (size_t k = 1; k <= 40; k += 9) {
2696 for (uint32_t m = 1; m <= 3; m++) {
2697 GemmMicrokernelTester()
2698 .extended_weights(true)
2699 .mr(3)
2700 .nr(8)
2701 .kr(8)
2702 .sr(1)
2703 .m(m)
2704 .n(n)
2705 .k(k)
2706 .iterations(1)
2707 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2708 }
2709 }
2710 }
2711 }
2712
2713 TEST(QC8_GEMM_XW_MINMAX_FP32_3X8C8__AVX2, strided_cm_subtile) {
2714 TEST_REQUIRES_X86_AVX2;
2715 for (size_t k = 1; k <= 40; k += 9) {
2716 for (uint32_t m = 1; m <= 3; m++) {
2717 for (uint32_t n = 1; n <= 8; n++) {
2718 GemmMicrokernelTester()
2719 .extended_weights(true)
2720 .mr(3)
2721 .nr(8)
2722 .kr(8)
2723 .sr(1)
2724 .m(m)
2725 .n(n)
2726 .k(k)
2727 .cm_stride(11)
2728 .iterations(1)
2729 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2730 }
2731 }
2732 }
2733 }
2734
2735 TEST(QC8_GEMM_XW_MINMAX_FP32_3X8C8__AVX2, strided_cm) {
2736 TEST_REQUIRES_X86_AVX2;
2737 GemmMicrokernelTester()
2738 .extended_weights(true)
2739 .mr(3)
2740 .nr(8)
2741 .kr(8)
2742 .sr(1)
2743 .m(3)
2744 .n(8)
2745 .k(8)
2746 .cm_stride(11)
2747 .Test(xnn_qc8_gemm_xw_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_minmax_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2748 }
2749#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhanc3e3f1c2021-06-03 09:56:16 -07002750
2751
2752#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2753 TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_eq_8) {
2754 TEST_REQUIRES_X86_AVX512SKX;
2755 GemmMicrokernelTester()
2756 .mr(1)
2757 .nr(16)
2758 .kr(8)
2759 .sr(1)
2760 .m(1)
2761 .n(16)
2762 .k(8)
2763 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2764 }
2765
2766 TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, strided_cn) {
2767 TEST_REQUIRES_X86_AVX512SKX;
2768 GemmMicrokernelTester()
2769 .mr(1)
2770 .nr(16)
2771 .kr(8)
2772 .sr(1)
2773 .m(1)
2774 .n(16)
2775 .k(8)
2776 .cn_stride(19)
2777 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2778 }
2779
2780 TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_eq_8_strided_a) {
2781 TEST_REQUIRES_X86_AVX512SKX;
2782 GemmMicrokernelTester()
2783 .mr(1)
2784 .nr(16)
2785 .kr(8)
2786 .sr(1)
2787 .m(1)
2788 .n(16)
2789 .k(8)
2790 .a_stride(11)
2791 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2792 }
2793
2794 TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_eq_8_subtile) {
2795 TEST_REQUIRES_X86_AVX512SKX;
2796 for (uint32_t m = 1; m <= 1; m++) {
2797 for (uint32_t n = 1; n <= 16; n++) {
2798 GemmMicrokernelTester()
2799 .mr(1)
2800 .nr(16)
2801 .kr(8)
2802 .sr(1)
2803 .m(m)
2804 .n(n)
2805 .k(8)
2806 .iterations(1)
2807 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2808 }
2809 }
2810 }
2811
2812 TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_eq_8_subtile_m) {
2813 TEST_REQUIRES_X86_AVX512SKX;
2814 for (uint32_t m = 1; m <= 1; m++) {
2815 GemmMicrokernelTester()
2816 .mr(1)
2817 .nr(16)
2818 .kr(8)
2819 .sr(1)
2820 .m(m)
2821 .n(16)
2822 .k(8)
2823 .iterations(1)
2824 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2825 }
2826 }
2827
2828 TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_eq_8_subtile_n) {
2829 TEST_REQUIRES_X86_AVX512SKX;
2830 for (uint32_t n = 1; n <= 16; n++) {
2831 GemmMicrokernelTester()
2832 .mr(1)
2833 .nr(16)
2834 .kr(8)
2835 .sr(1)
2836 .m(1)
2837 .n(n)
2838 .k(8)
2839 .iterations(1)
2840 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2841 }
2842 }
2843
2844 TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_lt_8) {
2845 TEST_REQUIRES_X86_AVX512SKX;
2846 for (size_t k = 1; k < 8; k++) {
2847 GemmMicrokernelTester()
2848 .mr(1)
2849 .nr(16)
2850 .kr(8)
2851 .sr(1)
2852 .m(1)
2853 .n(16)
2854 .k(k)
2855 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2856 }
2857 }
2858
2859 TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_lt_8_strided_a) {
2860 TEST_REQUIRES_X86_AVX512SKX;
2861 for (size_t k = 1; k < 8; k++) {
2862 GemmMicrokernelTester()
2863 .mr(1)
2864 .nr(16)
2865 .kr(8)
2866 .sr(1)
2867 .m(1)
2868 .n(16)
2869 .k(k)
2870 .a_stride(11)
2871 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2872 }
2873 }
2874
2875 TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_lt_8_subtile) {
2876 TEST_REQUIRES_X86_AVX512SKX;
2877 for (size_t k = 1; k < 8; k++) {
2878 for (uint32_t m = 1; m <= 1; m++) {
2879 for (uint32_t n = 1; n <= 16; n++) {
2880 GemmMicrokernelTester()
2881 .mr(1)
2882 .nr(16)
2883 .kr(8)
2884 .sr(1)
2885 .m(m)
2886 .n(n)
2887 .k(k)
2888 .iterations(1)
2889 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2890 }
2891 }
2892 }
2893 }
2894
2895 TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_gt_8) {
2896 TEST_REQUIRES_X86_AVX512SKX;
2897 for (size_t k = 9; k < 16; k++) {
2898 GemmMicrokernelTester()
2899 .mr(1)
2900 .nr(16)
2901 .kr(8)
2902 .sr(1)
2903 .m(1)
2904 .n(16)
2905 .k(k)
2906 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2907 }
2908 }
2909
2910 TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_gt_8_strided_a) {
2911 TEST_REQUIRES_X86_AVX512SKX;
2912 for (size_t k = 9; k < 16; k++) {
2913 GemmMicrokernelTester()
2914 .mr(1)
2915 .nr(16)
2916 .kr(8)
2917 .sr(1)
2918 .m(1)
2919 .n(16)
2920 .k(k)
2921 .a_stride(19)
2922 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2923 }
2924 }
2925
2926 TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_gt_8_subtile) {
2927 TEST_REQUIRES_X86_AVX512SKX;
2928 for (size_t k = 9; k < 16; k++) {
2929 for (uint32_t m = 1; m <= 1; m++) {
2930 for (uint32_t n = 1; n <= 16; n++) {
2931 GemmMicrokernelTester()
2932 .mr(1)
2933 .nr(16)
2934 .kr(8)
2935 .sr(1)
2936 .m(m)
2937 .n(n)
2938 .k(k)
2939 .iterations(1)
2940 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2941 }
2942 }
2943 }
2944 }
2945
2946 TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_div_8) {
2947 TEST_REQUIRES_X86_AVX512SKX;
2948 for (size_t k = 16; k <= 80; k += 8) {
2949 GemmMicrokernelTester()
2950 .mr(1)
2951 .nr(16)
2952 .kr(8)
2953 .sr(1)
2954 .m(1)
2955 .n(16)
2956 .k(k)
2957 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2958 }
2959 }
2960
2961 TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_div_8_strided_a) {
2962 TEST_REQUIRES_X86_AVX512SKX;
2963 for (size_t k = 16; k <= 80; k += 8) {
2964 GemmMicrokernelTester()
2965 .mr(1)
2966 .nr(16)
2967 .kr(8)
2968 .sr(1)
2969 .m(1)
2970 .n(16)
2971 .k(k)
2972 .a_stride(83)
2973 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2974 }
2975 }
2976
2977 TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_div_8_subtile) {
2978 TEST_REQUIRES_X86_AVX512SKX;
2979 for (size_t k = 16; k <= 80; k += 8) {
2980 for (uint32_t m = 1; m <= 1; m++) {
2981 for (uint32_t n = 1; n <= 16; n++) {
2982 GemmMicrokernelTester()
2983 .mr(1)
2984 .nr(16)
2985 .kr(8)
2986 .sr(1)
2987 .m(m)
2988 .n(n)
2989 .k(k)
2990 .iterations(1)
2991 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
2992 }
2993 }
2994 }
2995 }
2996
2997 TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, n_gt_16) {
2998 TEST_REQUIRES_X86_AVX512SKX;
2999 for (uint32_t n = 17; n < 32; n++) {
3000 for (size_t k = 1; k <= 40; k += 9) {
3001 GemmMicrokernelTester()
3002 .mr(1)
3003 .nr(16)
3004 .kr(8)
3005 .sr(1)
3006 .m(1)
3007 .n(16)
3008 .k(k)
3009 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
3010 }
3011 }
3012 }
3013
3014 TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, n_gt_16_strided_cn) {
3015 TEST_REQUIRES_X86_AVX512SKX;
3016 for (uint32_t n = 17; n < 32; n++) {
3017 for (size_t k = 1; k <= 40; k += 9) {
3018 GemmMicrokernelTester()
3019 .mr(1)
3020 .nr(16)
3021 .kr(8)
3022 .sr(1)
3023 .m(1)
3024 .n(16)
3025 .k(k)
3026 .cn_stride(19)
3027 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
3028 }
3029 }
3030 }
3031
3032 TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, n_gt_16_strided_a) {
3033 TEST_REQUIRES_X86_AVX512SKX;
3034 for (uint32_t n = 17; n < 32; n++) {
3035 for (size_t k = 1; k <= 40; k += 9) {
3036 GemmMicrokernelTester()
3037 .mr(1)
3038 .nr(16)
3039 .kr(8)
3040 .sr(1)
3041 .m(1)
3042 .n(n)
3043 .k(k)
3044 .a_stride(43)
3045 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
3046 }
3047 }
3048 }
3049
3050 TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, n_gt_16_subtile) {
3051 TEST_REQUIRES_X86_AVX512SKX;
3052 for (uint32_t n = 17; n < 32; n++) {
3053 for (size_t k = 1; k <= 40; k += 9) {
3054 for (uint32_t m = 1; m <= 1; m++) {
3055 GemmMicrokernelTester()
3056 .mr(1)
3057 .nr(16)
3058 .kr(8)
3059 .sr(1)
3060 .m(m)
3061 .n(n)
3062 .k(k)
3063 .iterations(1)
3064 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
3065 }
3066 }
3067 }
3068 }
3069
3070 TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, n_div_16) {
3071 TEST_REQUIRES_X86_AVX512SKX;
3072 for (uint32_t n = 32; n <= 48; n += 16) {
3073 for (size_t k = 1; k <= 40; k += 9) {
3074 GemmMicrokernelTester()
3075 .mr(1)
3076 .nr(16)
3077 .kr(8)
3078 .sr(1)
3079 .m(1)
3080 .n(16)
3081 .k(k)
3082 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
3083 }
3084 }
3085 }
3086
3087 TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, n_div_16_strided_cn) {
3088 TEST_REQUIRES_X86_AVX512SKX;
3089 for (uint32_t n = 32; n <= 48; n += 16) {
3090 for (size_t k = 1; k <= 40; k += 9) {
3091 GemmMicrokernelTester()
3092 .mr(1)
3093 .nr(16)
3094 .kr(8)
3095 .sr(1)
3096 .m(1)
3097 .n(n)
3098 .k(k)
3099 .cn_stride(19)
3100 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
3101 }
3102 }
3103 }
3104
3105 TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, n_div_16_strided_a) {
3106 TEST_REQUIRES_X86_AVX512SKX;
3107 for (uint32_t n = 32; n <= 48; n += 16) {
3108 for (size_t k = 1; k <= 40; k += 9) {
3109 GemmMicrokernelTester()
3110 .mr(1)
3111 .nr(16)
3112 .kr(8)
3113 .sr(1)
3114 .m(1)
3115 .n(n)
3116 .k(k)
3117 .a_stride(43)
3118 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
3119 }
3120 }
3121 }
3122
3123 TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, n_div_16_subtile) {
3124 TEST_REQUIRES_X86_AVX512SKX;
3125 for (uint32_t n = 32; n <= 48; n += 16) {
3126 for (size_t k = 1; k <= 40; k += 9) {
3127 for (uint32_t m = 1; m <= 1; m++) {
3128 GemmMicrokernelTester()
3129 .mr(1)
3130 .nr(16)
3131 .kr(8)
3132 .sr(1)
3133 .m(m)
3134 .n(n)
3135 .k(k)
3136 .iterations(1)
3137 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
3138 }
3139 }
3140 }
3141 }
3142
3143 TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, strided_cm_subtile) {
3144 TEST_REQUIRES_X86_AVX512SKX;
3145 for (size_t k = 1; k <= 40; k += 9) {
3146 for (uint32_t m = 1; m <= 1; m++) {
3147 for (uint32_t n = 1; n <= 16; n++) {
3148 GemmMicrokernelTester()
3149 .mr(1)
3150 .nr(16)
3151 .kr(8)
3152 .sr(1)
3153 .m(m)
3154 .n(n)
3155 .k(k)
3156 .cm_stride(19)
3157 .iterations(1)
3158 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
3159 }
3160 }
3161 }
3162 }
3163
3164 TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, qmin) {
3165 TEST_REQUIRES_X86_AVX512SKX;
3166 GemmMicrokernelTester()
3167 .mr(1)
3168 .nr(16)
3169 .kr(8)
3170 .sr(1)
3171 .m(1)
3172 .n(16)
3173 .k(8)
3174 .qmin(128)
3175 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
3176 }
3177
3178 TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, qmax) {
3179 TEST_REQUIRES_X86_AVX512SKX;
3180 GemmMicrokernelTester()
3181 .mr(1)
3182 .nr(16)
3183 .kr(8)
3184 .sr(1)
3185 .m(1)
3186 .n(16)
3187 .k(8)
3188 .qmax(128)
3189 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
3190 }
3191
3192 TEST(QC8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, strided_cm) {
3193 TEST_REQUIRES_X86_AVX512SKX;
3194 GemmMicrokernelTester()
3195 .mr(1)
3196 .nr(16)
3197 .kr(8)
3198 .sr(1)
3199 .m(1)
3200 .n(16)
3201 .k(8)
3202 .cm_stride(19)
3203 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
3204 }
3205#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3206
3207
3208#if XNN_ARCH_X86 || XNN_ARCH_X86_64
3209 TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_eq_8) {
3210 TEST_REQUIRES_X86_AVX512SKX;
3211 GemmMicrokernelTester()
3212 .mr(2)
3213 .nr(16)
3214 .kr(8)
3215 .sr(1)
3216 .m(2)
3217 .n(16)
3218 .k(8)
3219 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
3220 }
3221
3222 TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, strided_cn) {
3223 TEST_REQUIRES_X86_AVX512SKX;
3224 GemmMicrokernelTester()
3225 .mr(2)
3226 .nr(16)
3227 .kr(8)
3228 .sr(1)
3229 .m(2)
3230 .n(16)
3231 .k(8)
3232 .cn_stride(19)
3233 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
3234 }
3235
3236 TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_eq_8_strided_a) {
3237 TEST_REQUIRES_X86_AVX512SKX;
3238 GemmMicrokernelTester()
3239 .mr(2)
3240 .nr(16)
3241 .kr(8)
3242 .sr(1)
3243 .m(2)
3244 .n(16)
3245 .k(8)
3246 .a_stride(11)
3247 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
3248 }
3249
3250 TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_eq_8_subtile) {
3251 TEST_REQUIRES_X86_AVX512SKX;
3252 for (uint32_t m = 1; m <= 2; m++) {
3253 for (uint32_t n = 1; n <= 16; n++) {
3254 GemmMicrokernelTester()
3255 .mr(2)
3256 .nr(16)
3257 .kr(8)
3258 .sr(1)
3259 .m(m)
3260 .n(n)
3261 .k(8)
3262 .iterations(1)
3263 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
3264 }
3265 }
3266 }
3267
3268 TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_eq_8_subtile_m) {
3269 TEST_REQUIRES_X86_AVX512SKX;
3270 for (uint32_t m = 1; m <= 2; m++) {
3271 GemmMicrokernelTester()
3272 .mr(2)
3273 .nr(16)
3274 .kr(8)
3275 .sr(1)
3276 .m(m)
3277 .n(16)
3278 .k(8)
3279 .iterations(1)
3280 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
3281 }
3282 }
3283
3284 TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_eq_8_subtile_n) {
3285 TEST_REQUIRES_X86_AVX512SKX;
3286 for (uint32_t n = 1; n <= 16; n++) {
3287 GemmMicrokernelTester()
3288 .mr(2)
3289 .nr(16)
3290 .kr(8)
3291 .sr(1)
3292 .m(2)
3293 .n(n)
3294 .k(8)
3295 .iterations(1)
3296 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
3297 }
3298 }
3299
3300 TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_lt_8) {
3301 TEST_REQUIRES_X86_AVX512SKX;
3302 for (size_t k = 1; k < 8; k++) {
3303 GemmMicrokernelTester()
3304 .mr(2)
3305 .nr(16)
3306 .kr(8)
3307 .sr(1)
3308 .m(2)
3309 .n(16)
3310 .k(k)
3311 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
3312 }
3313 }
3314
3315 TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_lt_8_strided_a) {
3316 TEST_REQUIRES_X86_AVX512SKX;
3317 for (size_t k = 1; k < 8; k++) {
3318 GemmMicrokernelTester()
3319 .mr(2)
3320 .nr(16)
3321 .kr(8)
3322 .sr(1)
3323 .m(2)
3324 .n(16)
3325 .k(k)
3326 .a_stride(11)
3327 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
3328 }
3329 }
3330
3331 TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_lt_8_subtile) {
3332 TEST_REQUIRES_X86_AVX512SKX;
3333 for (size_t k = 1; k < 8; k++) {
3334 for (uint32_t m = 1; m <= 2; m++) {
3335 for (uint32_t n = 1; n <= 16; n++) {
3336 GemmMicrokernelTester()
3337 .mr(2)
3338 .nr(16)
3339 .kr(8)
3340 .sr(1)
3341 .m(m)
3342 .n(n)
3343 .k(k)
3344 .iterations(1)
3345 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
3346 }
3347 }
3348 }
3349 }
3350
3351 TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_gt_8) {
3352 TEST_REQUIRES_X86_AVX512SKX;
3353 for (size_t k = 9; k < 16; k++) {
3354 GemmMicrokernelTester()
3355 .mr(2)
3356 .nr(16)
3357 .kr(8)
3358 .sr(1)
3359 .m(2)
3360 .n(16)
3361 .k(k)
3362 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
3363 }
3364 }
3365
3366 TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_gt_8_strided_a) {
3367 TEST_REQUIRES_X86_AVX512SKX;
3368 for (size_t k = 9; k < 16; k++) {
3369 GemmMicrokernelTester()
3370 .mr(2)
3371 .nr(16)
3372 .kr(8)
3373 .sr(1)
3374 .m(2)
3375 .n(16)
3376 .k(k)
3377 .a_stride(19)
3378 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
3379 }
3380 }
3381
3382 TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_gt_8_subtile) {
3383 TEST_REQUIRES_X86_AVX512SKX;
3384 for (size_t k = 9; k < 16; k++) {
3385 for (uint32_t m = 1; m <= 2; m++) {
3386 for (uint32_t n = 1; n <= 16; n++) {
3387 GemmMicrokernelTester()
3388 .mr(2)
3389 .nr(16)
3390 .kr(8)
3391 .sr(1)
3392 .m(m)
3393 .n(n)
3394 .k(k)
3395 .iterations(1)
3396 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
3397 }
3398 }
3399 }
3400 }
3401
3402 TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_div_8) {
3403 TEST_REQUIRES_X86_AVX512SKX;
3404 for (size_t k = 16; k <= 80; k += 8) {
3405 GemmMicrokernelTester()
3406 .mr(2)
3407 .nr(16)
3408 .kr(8)
3409 .sr(1)
3410 .m(2)
3411 .n(16)
3412 .k(k)
3413 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
3414 }
3415 }
3416
3417 TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_div_8_strided_a) {
3418 TEST_REQUIRES_X86_AVX512SKX;
3419 for (size_t k = 16; k <= 80; k += 8) {
3420 GemmMicrokernelTester()
3421 .mr(2)
3422 .nr(16)
3423 .kr(8)
3424 .sr(1)
3425 .m(2)
3426 .n(16)
3427 .k(k)
3428 .a_stride(83)
3429 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
3430 }
3431 }
3432
3433 TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_div_8_subtile) {
3434 TEST_REQUIRES_X86_AVX512SKX;
3435 for (size_t k = 16; k <= 80; k += 8) {
3436 for (uint32_t m = 1; m <= 2; m++) {
3437 for (uint32_t n = 1; n <= 16; n++) {
3438 GemmMicrokernelTester()
3439 .mr(2)
3440 .nr(16)
3441 .kr(8)
3442 .sr(1)
3443 .m(m)
3444 .n(n)
3445 .k(k)
3446 .iterations(1)
3447 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
3448 }
3449 }
3450 }
3451 }
3452
3453 TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_gt_16) {
3454 TEST_REQUIRES_X86_AVX512SKX;
3455 for (uint32_t n = 17; n < 32; n++) {
3456 for (size_t k = 1; k <= 40; k += 9) {
3457 GemmMicrokernelTester()
3458 .mr(2)
3459 .nr(16)
3460 .kr(8)
3461 .sr(1)
3462 .m(2)
3463 .n(16)
3464 .k(k)
3465 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
3466 }
3467 }
3468 }
3469
3470 TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_gt_16_strided_cn) {
3471 TEST_REQUIRES_X86_AVX512SKX;
3472 for (uint32_t n = 17; n < 32; n++) {
3473 for (size_t k = 1; k <= 40; k += 9) {
3474 GemmMicrokernelTester()
3475 .mr(2)
3476 .nr(16)
3477 .kr(8)
3478 .sr(1)
3479 .m(2)
3480 .n(16)
3481 .k(k)
3482 .cn_stride(19)
3483 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
3484 }
3485 }
3486 }
3487
3488 TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_gt_16_strided_a) {
3489 TEST_REQUIRES_X86_AVX512SKX;
3490 for (uint32_t n = 17; n < 32; n++) {
3491 for (size_t k = 1; k <= 40; k += 9) {
3492 GemmMicrokernelTester()
3493 .mr(2)
3494 .nr(16)
3495 .kr(8)
3496 .sr(1)
3497 .m(2)
3498 .n(n)
3499 .k(k)
3500 .a_stride(43)
3501 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
3502 }
3503 }
3504 }
3505
3506 TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_gt_16_subtile) {
3507 TEST_REQUIRES_X86_AVX512SKX;
3508 for (uint32_t n = 17; n < 32; n++) {
3509 for (size_t k = 1; k <= 40; k += 9) {
3510 for (uint32_t m = 1; m <= 2; m++) {
3511 GemmMicrokernelTester()
3512 .mr(2)
3513 .nr(16)
3514 .kr(8)
3515 .sr(1)
3516 .m(m)
3517 .n(n)
3518 .k(k)
3519 .iterations(1)
3520 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
3521 }
3522 }
3523 }
3524 }
3525
3526 TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_div_16) {
3527 TEST_REQUIRES_X86_AVX512SKX;
3528 for (uint32_t n = 32; n <= 48; n += 16) {
3529 for (size_t k = 1; k <= 40; k += 9) {
3530 GemmMicrokernelTester()
3531 .mr(2)
3532 .nr(16)
3533 .kr(8)
3534 .sr(1)
3535 .m(2)
3536 .n(16)
3537 .k(k)
3538 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
3539 }
3540 }
3541 }
3542
3543 TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_div_16_strided_cn) {
3544 TEST_REQUIRES_X86_AVX512SKX;
3545 for (uint32_t n = 32; n <= 48; n += 16) {
3546 for (size_t k = 1; k <= 40; k += 9) {
3547 GemmMicrokernelTester()
3548 .mr(2)
3549 .nr(16)
3550 .kr(8)
3551 .sr(1)
3552 .m(2)
3553 .n(n)
3554 .k(k)
3555 .cn_stride(19)
3556 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
3557 }
3558 }
3559 }
3560
3561 TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_div_16_strided_a) {
3562 TEST_REQUIRES_X86_AVX512SKX;
3563 for (uint32_t n = 32; n <= 48; n += 16) {
3564 for (size_t k = 1; k <= 40; k += 9) {
3565 GemmMicrokernelTester()
3566 .mr(2)
3567 .nr(16)
3568 .kr(8)
3569 .sr(1)
3570 .m(2)
3571 .n(n)
3572 .k(k)
3573 .a_stride(43)
3574 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
3575 }
3576 }
3577 }
3578
3579 TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_div_16_subtile) {
3580 TEST_REQUIRES_X86_AVX512SKX;
3581 for (uint32_t n = 32; n <= 48; n += 16) {
3582 for (size_t k = 1; k <= 40; k += 9) {
3583 for (uint32_t m = 1; m <= 2; m++) {
3584 GemmMicrokernelTester()
3585 .mr(2)
3586 .nr(16)
3587 .kr(8)
3588 .sr(1)
3589 .m(m)
3590 .n(n)
3591 .k(k)
3592 .iterations(1)
3593 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
3594 }
3595 }
3596 }
3597 }
3598
3599 TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, strided_cm_subtile) {
3600 TEST_REQUIRES_X86_AVX512SKX;
3601 for (size_t k = 1; k <= 40; k += 9) {
3602 for (uint32_t m = 1; m <= 2; m++) {
3603 for (uint32_t n = 1; n <= 16; n++) {
3604 GemmMicrokernelTester()
3605 .mr(2)
3606 .nr(16)
3607 .kr(8)
3608 .sr(1)
3609 .m(m)
3610 .n(n)
3611 .k(k)
3612 .cm_stride(19)
3613 .iterations(1)
3614 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
3615 }
3616 }
3617 }
3618 }
3619
3620 TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, qmin) {
3621 TEST_REQUIRES_X86_AVX512SKX;
3622 GemmMicrokernelTester()
3623 .mr(2)
3624 .nr(16)
3625 .kr(8)
3626 .sr(1)
3627 .m(2)
3628 .n(16)
3629 .k(8)
3630 .qmin(128)
3631 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
3632 }
3633
3634 TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, qmax) {
3635 TEST_REQUIRES_X86_AVX512SKX;
3636 GemmMicrokernelTester()
3637 .mr(2)
3638 .nr(16)
3639 .kr(8)
3640 .sr(1)
3641 .m(2)
3642 .n(16)
3643 .k(8)
3644 .qmax(128)
3645 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
3646 }
3647
3648 TEST(QC8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, strided_cm) {
3649 TEST_REQUIRES_X86_AVX512SKX;
3650 GemmMicrokernelTester()
3651 .mr(2)
3652 .nr(16)
3653 .kr(8)
3654 .sr(1)
3655 .m(2)
3656 .n(16)
3657 .k(8)
3658 .cm_stride(19)
3659 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
3660 }
3661#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3662
3663
3664#if XNN_ARCH_X86 || XNN_ARCH_X86_64
3665 TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_eq_8) {
3666 TEST_REQUIRES_X86_AVX512SKX;
3667 GemmMicrokernelTester()
3668 .mr(3)
3669 .nr(16)
3670 .kr(8)
3671 .sr(1)
3672 .m(3)
3673 .n(16)
3674 .k(8)
3675 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
3676 }
3677
3678 TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, strided_cn) {
3679 TEST_REQUIRES_X86_AVX512SKX;
3680 GemmMicrokernelTester()
3681 .mr(3)
3682 .nr(16)
3683 .kr(8)
3684 .sr(1)
3685 .m(3)
3686 .n(16)
3687 .k(8)
3688 .cn_stride(19)
3689 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
3690 }
3691
3692 TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_eq_8_strided_a) {
3693 TEST_REQUIRES_X86_AVX512SKX;
3694 GemmMicrokernelTester()
3695 .mr(3)
3696 .nr(16)
3697 .kr(8)
3698 .sr(1)
3699 .m(3)
3700 .n(16)
3701 .k(8)
3702 .a_stride(11)
3703 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
3704 }
3705
3706 TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_eq_8_subtile) {
3707 TEST_REQUIRES_X86_AVX512SKX;
3708 for (uint32_t m = 1; m <= 3; m++) {
3709 for (uint32_t n = 1; n <= 16; n++) {
3710 GemmMicrokernelTester()
3711 .mr(3)
3712 .nr(16)
3713 .kr(8)
3714 .sr(1)
3715 .m(m)
3716 .n(n)
3717 .k(8)
3718 .iterations(1)
3719 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
3720 }
3721 }
3722 }
3723
3724 TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_eq_8_subtile_m) {
3725 TEST_REQUIRES_X86_AVX512SKX;
3726 for (uint32_t m = 1; m <= 3; m++) {
3727 GemmMicrokernelTester()
3728 .mr(3)
3729 .nr(16)
3730 .kr(8)
3731 .sr(1)
3732 .m(m)
3733 .n(16)
3734 .k(8)
3735 .iterations(1)
3736 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
3737 }
3738 }
3739
3740 TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_eq_8_subtile_n) {
3741 TEST_REQUIRES_X86_AVX512SKX;
3742 for (uint32_t n = 1; n <= 16; n++) {
3743 GemmMicrokernelTester()
3744 .mr(3)
3745 .nr(16)
3746 .kr(8)
3747 .sr(1)
3748 .m(3)
3749 .n(n)
3750 .k(8)
3751 .iterations(1)
3752 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
3753 }
3754 }
3755
3756 TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_lt_8) {
3757 TEST_REQUIRES_X86_AVX512SKX;
3758 for (size_t k = 1; k < 8; k++) {
3759 GemmMicrokernelTester()
3760 .mr(3)
3761 .nr(16)
3762 .kr(8)
3763 .sr(1)
3764 .m(3)
3765 .n(16)
3766 .k(k)
3767 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
3768 }
3769 }
3770
3771 TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_lt_8_strided_a) {
3772 TEST_REQUIRES_X86_AVX512SKX;
3773 for (size_t k = 1; k < 8; k++) {
3774 GemmMicrokernelTester()
3775 .mr(3)
3776 .nr(16)
3777 .kr(8)
3778 .sr(1)
3779 .m(3)
3780 .n(16)
3781 .k(k)
3782 .a_stride(11)
3783 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
3784 }
3785 }
3786
3787 TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_lt_8_subtile) {
3788 TEST_REQUIRES_X86_AVX512SKX;
3789 for (size_t k = 1; k < 8; k++) {
3790 for (uint32_t m = 1; m <= 3; m++) {
3791 for (uint32_t n = 1; n <= 16; n++) {
3792 GemmMicrokernelTester()
3793 .mr(3)
3794 .nr(16)
3795 .kr(8)
3796 .sr(1)
3797 .m(m)
3798 .n(n)
3799 .k(k)
3800 .iterations(1)
3801 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
3802 }
3803 }
3804 }
3805 }
3806
3807 TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_gt_8) {
3808 TEST_REQUIRES_X86_AVX512SKX;
3809 for (size_t k = 9; k < 16; k++) {
3810 GemmMicrokernelTester()
3811 .mr(3)
3812 .nr(16)
3813 .kr(8)
3814 .sr(1)
3815 .m(3)
3816 .n(16)
3817 .k(k)
3818 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
3819 }
3820 }
3821
3822 TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_gt_8_strided_a) {
3823 TEST_REQUIRES_X86_AVX512SKX;
3824 for (size_t k = 9; k < 16; k++) {
3825 GemmMicrokernelTester()
3826 .mr(3)
3827 .nr(16)
3828 .kr(8)
3829 .sr(1)
3830 .m(3)
3831 .n(16)
3832 .k(k)
3833 .a_stride(19)
3834 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
3835 }
3836 }
3837
3838 TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_gt_8_subtile) {
3839 TEST_REQUIRES_X86_AVX512SKX;
3840 for (size_t k = 9; k < 16; k++) {
3841 for (uint32_t m = 1; m <= 3; m++) {
3842 for (uint32_t n = 1; n <= 16; n++) {
3843 GemmMicrokernelTester()
3844 .mr(3)
3845 .nr(16)
3846 .kr(8)
3847 .sr(1)
3848 .m(m)
3849 .n(n)
3850 .k(k)
3851 .iterations(1)
3852 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
3853 }
3854 }
3855 }
3856 }
3857
3858 TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_div_8) {
3859 TEST_REQUIRES_X86_AVX512SKX;
3860 for (size_t k = 16; k <= 80; k += 8) {
3861 GemmMicrokernelTester()
3862 .mr(3)
3863 .nr(16)
3864 .kr(8)
3865 .sr(1)
3866 .m(3)
3867 .n(16)
3868 .k(k)
3869 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
3870 }
3871 }
3872
3873 TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_div_8_strided_a) {
3874 TEST_REQUIRES_X86_AVX512SKX;
3875 for (size_t k = 16; k <= 80; k += 8) {
3876 GemmMicrokernelTester()
3877 .mr(3)
3878 .nr(16)
3879 .kr(8)
3880 .sr(1)
3881 .m(3)
3882 .n(16)
3883 .k(k)
3884 .a_stride(83)
3885 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
3886 }
3887 }
3888
3889 TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_div_8_subtile) {
3890 TEST_REQUIRES_X86_AVX512SKX;
3891 for (size_t k = 16; k <= 80; k += 8) {
3892 for (uint32_t m = 1; m <= 3; m++) {
3893 for (uint32_t n = 1; n <= 16; n++) {
3894 GemmMicrokernelTester()
3895 .mr(3)
3896 .nr(16)
3897 .kr(8)
3898 .sr(1)
3899 .m(m)
3900 .n(n)
3901 .k(k)
3902 .iterations(1)
3903 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
3904 }
3905 }
3906 }
3907 }
3908
3909 TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_gt_16) {
3910 TEST_REQUIRES_X86_AVX512SKX;
3911 for (uint32_t n = 17; n < 32; n++) {
3912 for (size_t k = 1; k <= 40; k += 9) {
3913 GemmMicrokernelTester()
3914 .mr(3)
3915 .nr(16)
3916 .kr(8)
3917 .sr(1)
3918 .m(3)
3919 .n(16)
3920 .k(k)
3921 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
3922 }
3923 }
3924 }
3925
3926 TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_gt_16_strided_cn) {
3927 TEST_REQUIRES_X86_AVX512SKX;
3928 for (uint32_t n = 17; n < 32; n++) {
3929 for (size_t k = 1; k <= 40; k += 9) {
3930 GemmMicrokernelTester()
3931 .mr(3)
3932 .nr(16)
3933 .kr(8)
3934 .sr(1)
3935 .m(3)
3936 .n(16)
3937 .k(k)
3938 .cn_stride(19)
3939 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
3940 }
3941 }
3942 }
3943
3944 TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_gt_16_strided_a) {
3945 TEST_REQUIRES_X86_AVX512SKX;
3946 for (uint32_t n = 17; n < 32; n++) {
3947 for (size_t k = 1; k <= 40; k += 9) {
3948 GemmMicrokernelTester()
3949 .mr(3)
3950 .nr(16)
3951 .kr(8)
3952 .sr(1)
3953 .m(3)
3954 .n(n)
3955 .k(k)
3956 .a_stride(43)
3957 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
3958 }
3959 }
3960 }
3961
3962 TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_gt_16_subtile) {
3963 TEST_REQUIRES_X86_AVX512SKX;
3964 for (uint32_t n = 17; n < 32; n++) {
3965 for (size_t k = 1; k <= 40; k += 9) {
3966 for (uint32_t m = 1; m <= 3; m++) {
3967 GemmMicrokernelTester()
3968 .mr(3)
3969 .nr(16)
3970 .kr(8)
3971 .sr(1)
3972 .m(m)
3973 .n(n)
3974 .k(k)
3975 .iterations(1)
3976 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
3977 }
3978 }
3979 }
3980 }
3981
3982 TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_div_16) {
3983 TEST_REQUIRES_X86_AVX512SKX;
3984 for (uint32_t n = 32; n <= 48; n += 16) {
3985 for (size_t k = 1; k <= 40; k += 9) {
3986 GemmMicrokernelTester()
3987 .mr(3)
3988 .nr(16)
3989 .kr(8)
3990 .sr(1)
3991 .m(3)
3992 .n(16)
3993 .k(k)
3994 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
3995 }
3996 }
3997 }
3998
3999 TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_div_16_strided_cn) {
4000 TEST_REQUIRES_X86_AVX512SKX;
4001 for (uint32_t n = 32; n <= 48; n += 16) {
4002 for (size_t k = 1; k <= 40; k += 9) {
4003 GemmMicrokernelTester()
4004 .mr(3)
4005 .nr(16)
4006 .kr(8)
4007 .sr(1)
4008 .m(3)
4009 .n(n)
4010 .k(k)
4011 .cn_stride(19)
4012 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
4013 }
4014 }
4015 }
4016
4017 TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_div_16_strided_a) {
4018 TEST_REQUIRES_X86_AVX512SKX;
4019 for (uint32_t n = 32; n <= 48; n += 16) {
4020 for (size_t k = 1; k <= 40; k += 9) {
4021 GemmMicrokernelTester()
4022 .mr(3)
4023 .nr(16)
4024 .kr(8)
4025 .sr(1)
4026 .m(3)
4027 .n(n)
4028 .k(k)
4029 .a_stride(43)
4030 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
4031 }
4032 }
4033 }
4034
4035 TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_div_16_subtile) {
4036 TEST_REQUIRES_X86_AVX512SKX;
4037 for (uint32_t n = 32; n <= 48; n += 16) {
4038 for (size_t k = 1; k <= 40; k += 9) {
4039 for (uint32_t m = 1; m <= 3; m++) {
4040 GemmMicrokernelTester()
4041 .mr(3)
4042 .nr(16)
4043 .kr(8)
4044 .sr(1)
4045 .m(m)
4046 .n(n)
4047 .k(k)
4048 .iterations(1)
4049 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
4050 }
4051 }
4052 }
4053 }
4054
4055 TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, strided_cm_subtile) {
4056 TEST_REQUIRES_X86_AVX512SKX;
4057 for (size_t k = 1; k <= 40; k += 9) {
4058 for (uint32_t m = 1; m <= 3; m++) {
4059 for (uint32_t n = 1; n <= 16; n++) {
4060 GemmMicrokernelTester()
4061 .mr(3)
4062 .nr(16)
4063 .kr(8)
4064 .sr(1)
4065 .m(m)
4066 .n(n)
4067 .k(k)
4068 .cm_stride(19)
4069 .iterations(1)
4070 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
4071 }
4072 }
4073 }
4074 }
4075
4076 TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, qmin) {
4077 TEST_REQUIRES_X86_AVX512SKX;
4078 GemmMicrokernelTester()
4079 .mr(3)
4080 .nr(16)
4081 .kr(8)
4082 .sr(1)
4083 .m(3)
4084 .n(16)
4085 .k(8)
4086 .qmin(128)
4087 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
4088 }
4089
4090 TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, qmax) {
4091 TEST_REQUIRES_X86_AVX512SKX;
4092 GemmMicrokernelTester()
4093 .mr(3)
4094 .nr(16)
4095 .kr(8)
4096 .sr(1)
4097 .m(3)
4098 .n(16)
4099 .k(8)
4100 .qmax(128)
4101 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
4102 }
4103
4104 TEST(QC8_GEMM_MINMAX_FP32_3X16C8__AVX512SKX, strided_cm) {
4105 TEST_REQUIRES_X86_AVX512SKX;
4106 GemmMicrokernelTester()
4107 .mr(3)
4108 .nr(16)
4109 .kr(8)
4110 .sr(1)
4111 .m(3)
4112 .n(16)
4113 .k(8)
4114 .cm_stride(19)
4115 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
4116 }
4117#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4118
4119
4120#if XNN_ARCH_X86 || XNN_ARCH_X86_64
4121 TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_eq_8) {
4122 TEST_REQUIRES_X86_AVX512SKX;
4123 GemmMicrokernelTester()
4124 .mr(4)
4125 .nr(16)
4126 .kr(8)
4127 .sr(1)
4128 .m(4)
4129 .n(16)
4130 .k(8)
4131 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
4132 }
4133
4134 TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, strided_cn) {
4135 TEST_REQUIRES_X86_AVX512SKX;
4136 GemmMicrokernelTester()
4137 .mr(4)
4138 .nr(16)
4139 .kr(8)
4140 .sr(1)
4141 .m(4)
4142 .n(16)
4143 .k(8)
4144 .cn_stride(19)
4145 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
4146 }
4147
4148 TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_eq_8_strided_a) {
4149 TEST_REQUIRES_X86_AVX512SKX;
4150 GemmMicrokernelTester()
4151 .mr(4)
4152 .nr(16)
4153 .kr(8)
4154 .sr(1)
4155 .m(4)
4156 .n(16)
4157 .k(8)
4158 .a_stride(11)
4159 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
4160 }
4161
4162 TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_eq_8_subtile) {
4163 TEST_REQUIRES_X86_AVX512SKX;
4164 for (uint32_t m = 1; m <= 4; m++) {
4165 for (uint32_t n = 1; n <= 16; n++) {
4166 GemmMicrokernelTester()
4167 .mr(4)
4168 .nr(16)
4169 .kr(8)
4170 .sr(1)
4171 .m(m)
4172 .n(n)
4173 .k(8)
4174 .iterations(1)
4175 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
4176 }
4177 }
4178 }
4179
4180 TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_eq_8_subtile_m) {
4181 TEST_REQUIRES_X86_AVX512SKX;
4182 for (uint32_t m = 1; m <= 4; m++) {
4183 GemmMicrokernelTester()
4184 .mr(4)
4185 .nr(16)
4186 .kr(8)
4187 .sr(1)
4188 .m(m)
4189 .n(16)
4190 .k(8)
4191 .iterations(1)
4192 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
4193 }
4194 }
4195
4196 TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_eq_8_subtile_n) {
4197 TEST_REQUIRES_X86_AVX512SKX;
4198 for (uint32_t n = 1; n <= 16; n++) {
4199 GemmMicrokernelTester()
4200 .mr(4)
4201 .nr(16)
4202 .kr(8)
4203 .sr(1)
4204 .m(4)
4205 .n(n)
4206 .k(8)
4207 .iterations(1)
4208 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
4209 }
4210 }
4211
4212 TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_lt_8) {
4213 TEST_REQUIRES_X86_AVX512SKX;
4214 for (size_t k = 1; k < 8; k++) {
4215 GemmMicrokernelTester()
4216 .mr(4)
4217 .nr(16)
4218 .kr(8)
4219 .sr(1)
4220 .m(4)
4221 .n(16)
4222 .k(k)
4223 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
4224 }
4225 }
4226
4227 TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_lt_8_strided_a) {
4228 TEST_REQUIRES_X86_AVX512SKX;
4229 for (size_t k = 1; k < 8; k++) {
4230 GemmMicrokernelTester()
4231 .mr(4)
4232 .nr(16)
4233 .kr(8)
4234 .sr(1)
4235 .m(4)
4236 .n(16)
4237 .k(k)
4238 .a_stride(11)
4239 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
4240 }
4241 }
4242
4243 TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_lt_8_subtile) {
4244 TEST_REQUIRES_X86_AVX512SKX;
4245 for (size_t k = 1; k < 8; k++) {
4246 for (uint32_t m = 1; m <= 4; m++) {
4247 for (uint32_t n = 1; n <= 16; n++) {
4248 GemmMicrokernelTester()
4249 .mr(4)
4250 .nr(16)
4251 .kr(8)
4252 .sr(1)
4253 .m(m)
4254 .n(n)
4255 .k(k)
4256 .iterations(1)
4257 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
4258 }
4259 }
4260 }
4261 }
4262
4263 TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_gt_8) {
4264 TEST_REQUIRES_X86_AVX512SKX;
4265 for (size_t k = 9; k < 16; k++) {
4266 GemmMicrokernelTester()
4267 .mr(4)
4268 .nr(16)
4269 .kr(8)
4270 .sr(1)
4271 .m(4)
4272 .n(16)
4273 .k(k)
4274 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
4275 }
4276 }
4277
4278 TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_gt_8_strided_a) {
4279 TEST_REQUIRES_X86_AVX512SKX;
4280 for (size_t k = 9; k < 16; k++) {
4281 GemmMicrokernelTester()
4282 .mr(4)
4283 .nr(16)
4284 .kr(8)
4285 .sr(1)
4286 .m(4)
4287 .n(16)
4288 .k(k)
4289 .a_stride(19)
4290 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
4291 }
4292 }
4293
4294 TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_gt_8_subtile) {
4295 TEST_REQUIRES_X86_AVX512SKX;
4296 for (size_t k = 9; k < 16; k++) {
4297 for (uint32_t m = 1; m <= 4; m++) {
4298 for (uint32_t n = 1; n <= 16; n++) {
4299 GemmMicrokernelTester()
4300 .mr(4)
4301 .nr(16)
4302 .kr(8)
4303 .sr(1)
4304 .m(m)
4305 .n(n)
4306 .k(k)
4307 .iterations(1)
4308 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
4309 }
4310 }
4311 }
4312 }
4313
4314 TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_div_8) {
4315 TEST_REQUIRES_X86_AVX512SKX;
4316 for (size_t k = 16; k <= 80; k += 8) {
4317 GemmMicrokernelTester()
4318 .mr(4)
4319 .nr(16)
4320 .kr(8)
4321 .sr(1)
4322 .m(4)
4323 .n(16)
4324 .k(k)
4325 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
4326 }
4327 }
4328
4329 TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_div_8_strided_a) {
4330 TEST_REQUIRES_X86_AVX512SKX;
4331 for (size_t k = 16; k <= 80; k += 8) {
4332 GemmMicrokernelTester()
4333 .mr(4)
4334 .nr(16)
4335 .kr(8)
4336 .sr(1)
4337 .m(4)
4338 .n(16)
4339 .k(k)
4340 .a_stride(83)
4341 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
4342 }
4343 }
4344
4345 TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_div_8_subtile) {
4346 TEST_REQUIRES_X86_AVX512SKX;
4347 for (size_t k = 16; k <= 80; k += 8) {
4348 for (uint32_t m = 1; m <= 4; m++) {
4349 for (uint32_t n = 1; n <= 16; n++) {
4350 GemmMicrokernelTester()
4351 .mr(4)
4352 .nr(16)
4353 .kr(8)
4354 .sr(1)
4355 .m(m)
4356 .n(n)
4357 .k(k)
4358 .iterations(1)
4359 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
4360 }
4361 }
4362 }
4363 }
4364
4365 TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, n_gt_16) {
4366 TEST_REQUIRES_X86_AVX512SKX;
4367 for (uint32_t n = 17; n < 32; n++) {
4368 for (size_t k = 1; k <= 40; k += 9) {
4369 GemmMicrokernelTester()
4370 .mr(4)
4371 .nr(16)
4372 .kr(8)
4373 .sr(1)
4374 .m(4)
4375 .n(16)
4376 .k(k)
4377 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
4378 }
4379 }
4380 }
4381
4382 TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, n_gt_16_strided_cn) {
4383 TEST_REQUIRES_X86_AVX512SKX;
4384 for (uint32_t n = 17; n < 32; n++) {
4385 for (size_t k = 1; k <= 40; k += 9) {
4386 GemmMicrokernelTester()
4387 .mr(4)
4388 .nr(16)
4389 .kr(8)
4390 .sr(1)
4391 .m(4)
4392 .n(16)
4393 .k(k)
4394 .cn_stride(19)
4395 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
4396 }
4397 }
4398 }
4399
4400 TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, n_gt_16_strided_a) {
4401 TEST_REQUIRES_X86_AVX512SKX;
4402 for (uint32_t n = 17; n < 32; n++) {
4403 for (size_t k = 1; k <= 40; k += 9) {
4404 GemmMicrokernelTester()
4405 .mr(4)
4406 .nr(16)
4407 .kr(8)
4408 .sr(1)
4409 .m(4)
4410 .n(n)
4411 .k(k)
4412 .a_stride(43)
4413 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
4414 }
4415 }
4416 }
4417
4418 TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, n_gt_16_subtile) {
4419 TEST_REQUIRES_X86_AVX512SKX;
4420 for (uint32_t n = 17; n < 32; n++) {
4421 for (size_t k = 1; k <= 40; k += 9) {
4422 for (uint32_t m = 1; m <= 4; m++) {
4423 GemmMicrokernelTester()
4424 .mr(4)
4425 .nr(16)
4426 .kr(8)
4427 .sr(1)
4428 .m(m)
4429 .n(n)
4430 .k(k)
4431 .iterations(1)
4432 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
4433 }
4434 }
4435 }
4436 }
4437
4438 TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, n_div_16) {
4439 TEST_REQUIRES_X86_AVX512SKX;
4440 for (uint32_t n = 32; n <= 48; n += 16) {
4441 for (size_t k = 1; k <= 40; k += 9) {
4442 GemmMicrokernelTester()
4443 .mr(4)
4444 .nr(16)
4445 .kr(8)
4446 .sr(1)
4447 .m(4)
4448 .n(16)
4449 .k(k)
4450 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
4451 }
4452 }
4453 }
4454
4455 TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, n_div_16_strided_cn) {
4456 TEST_REQUIRES_X86_AVX512SKX;
4457 for (uint32_t n = 32; n <= 48; n += 16) {
4458 for (size_t k = 1; k <= 40; k += 9) {
4459 GemmMicrokernelTester()
4460 .mr(4)
4461 .nr(16)
4462 .kr(8)
4463 .sr(1)
4464 .m(4)
4465 .n(n)
4466 .k(k)
4467 .cn_stride(19)
4468 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
4469 }
4470 }
4471 }
4472
4473 TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, n_div_16_strided_a) {
4474 TEST_REQUIRES_X86_AVX512SKX;
4475 for (uint32_t n = 32; n <= 48; n += 16) {
4476 for (size_t k = 1; k <= 40; k += 9) {
4477 GemmMicrokernelTester()
4478 .mr(4)
4479 .nr(16)
4480 .kr(8)
4481 .sr(1)
4482 .m(4)
4483 .n(n)
4484 .k(k)
4485 .a_stride(43)
4486 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
4487 }
4488 }
4489 }
4490
4491 TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, n_div_16_subtile) {
4492 TEST_REQUIRES_X86_AVX512SKX;
4493 for (uint32_t n = 32; n <= 48; n += 16) {
4494 for (size_t k = 1; k <= 40; k += 9) {
4495 for (uint32_t m = 1; m <= 4; m++) {
4496 GemmMicrokernelTester()
4497 .mr(4)
4498 .nr(16)
4499 .kr(8)
4500 .sr(1)
4501 .m(m)
4502 .n(n)
4503 .k(k)
4504 .iterations(1)
4505 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
4506 }
4507 }
4508 }
4509 }
4510
4511 TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, strided_cm_subtile) {
4512 TEST_REQUIRES_X86_AVX512SKX;
4513 for (size_t k = 1; k <= 40; k += 9) {
4514 for (uint32_t m = 1; m <= 4; m++) {
4515 for (uint32_t n = 1; n <= 16; n++) {
4516 GemmMicrokernelTester()
4517 .mr(4)
4518 .nr(16)
4519 .kr(8)
4520 .sr(1)
4521 .m(m)
4522 .n(n)
4523 .k(k)
4524 .cm_stride(19)
4525 .iterations(1)
4526 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
4527 }
4528 }
4529 }
4530 }
4531
4532 TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, qmin) {
4533 TEST_REQUIRES_X86_AVX512SKX;
4534 GemmMicrokernelTester()
4535 .mr(4)
4536 .nr(16)
4537 .kr(8)
4538 .sr(1)
4539 .m(4)
4540 .n(16)
4541 .k(8)
4542 .qmin(128)
4543 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
4544 }
4545
4546 TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, qmax) {
4547 TEST_REQUIRES_X86_AVX512SKX;
4548 GemmMicrokernelTester()
4549 .mr(4)
4550 .nr(16)
4551 .kr(8)
4552 .sr(1)
4553 .m(4)
4554 .n(16)
4555 .k(8)
4556 .qmax(128)
4557 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
4558 }
4559
4560 TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, strided_cm) {
4561 TEST_REQUIRES_X86_AVX512SKX;
4562 GemmMicrokernelTester()
4563 .mr(4)
4564 .nr(16)
4565 .kr(8)
4566 .sr(1)
4567 .m(4)
4568 .n(16)
4569 .k(8)
4570 .cm_stride(19)
4571 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_minmax_avx512_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
4572 }
4573#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64