blob: 7f8b7e24dd090fffaf745334ea49460b96df7611 [file] [log] [blame]
Marat Dukhan9b474cf2021-05-25 16:37:48 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8//
9// Auto-generated file. Do not edit!
10// Specification: test/qs8-igemm-minmax-fp32.yaml
11// Generator: tools/generate-gemm-test.py
12
13
14#include <gtest/gtest.h>
15
Frank Barchard447aa7b2021-12-28 14:11:40 -080016#include <xnnpack/allocator.h>
Marat Dukhan9b474cf2021-05-25 16:37:48 -070017#include <xnnpack/common.h>
18#include <xnnpack/isa-checks.h>
19
20#include <xnnpack/gemm.h>
21#include <xnnpack/igemm.h>
22#include <xnnpack/ppmm.h>
23#include "gemm-microkernel-tester.h"
24
25
Frank Barchard287952a2021-11-03 15:26:45 -070026#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Frank Barchard15eec022021-11-17 13:26:20 -080027 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, k_eq_16) {
28 TEST_REQUIRES_ARM_NEON;
29 GemmMicrokernelTester()
30 .mr(2)
31 .nr(8)
32 .kr(2)
33 .sr(1)
34 .m(2)
35 .n(8)
36 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080037 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -080038 }
39
40 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, strided_cn) {
41 TEST_REQUIRES_ARM_NEON;
42 GemmMicrokernelTester()
43 .mr(2)
44 .nr(8)
45 .kr(2)
46 .sr(1)
47 .m(2)
48 .n(8)
49 .k(16)
50 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080051 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -080052 }
53
54 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, k_eq_16_subtile) {
55 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080056 for (uint32_t n = 1; n <= 8; n++) {
57 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard15eec022021-11-17 13:26:20 -080058 GemmMicrokernelTester()
59 .mr(2)
60 .nr(8)
61 .kr(2)
62 .sr(1)
63 .m(m)
64 .n(n)
65 .k(16)
66 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080067 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -080068 }
69 }
70 }
71
72 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, k_eq_16_subtile_m) {
73 TEST_REQUIRES_ARM_NEON;
74 for (uint32_t m = 1; m <= 2; m++) {
75 GemmMicrokernelTester()
76 .mr(2)
77 .nr(8)
78 .kr(2)
79 .sr(1)
80 .m(m)
81 .n(8)
82 .k(16)
83 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080084 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -080085 }
86 }
87
88 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, k_eq_16_subtile_n) {
89 TEST_REQUIRES_ARM_NEON;
90 for (uint32_t n = 1; n <= 8; n++) {
91 GemmMicrokernelTester()
92 .mr(2)
93 .nr(8)
94 .kr(2)
95 .sr(1)
96 .m(2)
97 .n(n)
98 .k(16)
99 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800100 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800101 }
102 }
103
104 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, k_lt_16) {
105 TEST_REQUIRES_ARM_NEON;
106 for (size_t k = 1; k < 16; k++) {
107 GemmMicrokernelTester()
108 .mr(2)
109 .nr(8)
110 .kr(2)
111 .sr(1)
112 .m(2)
113 .n(8)
114 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -0800115 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800116 }
117 }
118
119 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, k_lt_16_subtile) {
120 TEST_REQUIRES_ARM_NEON;
121 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800122 for (uint32_t n = 1; n <= 8; n++) {
123 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard15eec022021-11-17 13:26:20 -0800124 GemmMicrokernelTester()
125 .mr(2)
126 .nr(8)
127 .kr(2)
128 .sr(1)
129 .m(m)
130 .n(n)
131 .k(k)
132 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800133 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800134 }
135 }
136 }
137 }
138
139 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, k_gt_16) {
140 TEST_REQUIRES_ARM_NEON;
141 for (size_t k = 17; k < 32; k++) {
142 GemmMicrokernelTester()
143 .mr(2)
144 .nr(8)
145 .kr(2)
146 .sr(1)
147 .m(2)
148 .n(8)
149 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -0800150 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800151 }
152 }
153
154 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, k_gt_16_subtile) {
155 TEST_REQUIRES_ARM_NEON;
156 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800157 for (uint32_t n = 1; n <= 8; n++) {
158 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard15eec022021-11-17 13:26:20 -0800159 GemmMicrokernelTester()
160 .mr(2)
161 .nr(8)
162 .kr(2)
163 .sr(1)
164 .m(m)
165 .n(n)
166 .k(k)
167 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800168 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800169 }
170 }
171 }
172 }
173
174 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, k_div_16) {
175 TEST_REQUIRES_ARM_NEON;
176 for (size_t k = 32; k <= 160; k += 16) {
177 GemmMicrokernelTester()
178 .mr(2)
179 .nr(8)
180 .kr(2)
181 .sr(1)
182 .m(2)
183 .n(8)
184 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -0800185 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800186 }
187 }
188
189 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, k_div_16_subtile) {
190 TEST_REQUIRES_ARM_NEON;
191 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800192 for (uint32_t n = 1; n <= 8; n++) {
193 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard15eec022021-11-17 13:26:20 -0800194 GemmMicrokernelTester()
195 .mr(2)
196 .nr(8)
197 .kr(2)
198 .sr(1)
199 .m(m)
200 .n(n)
201 .k(k)
202 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800203 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800204 }
205 }
206 }
207 }
208
209 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, n_gt_8) {
210 TEST_REQUIRES_ARM_NEON;
211 for (uint32_t n = 9; n < 16; n++) {
212 for (size_t k = 1; k <= 80; k += 17) {
213 GemmMicrokernelTester()
214 .mr(2)
215 .nr(8)
216 .kr(2)
217 .sr(1)
218 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800219 .n(n)
Frank Barchard15eec022021-11-17 13:26:20 -0800220 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -0800221 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800222 }
223 }
224 }
225
226 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, n_gt_8_strided_cn) {
227 TEST_REQUIRES_ARM_NEON;
228 for (uint32_t n = 9; n < 16; n++) {
229 for (size_t k = 1; k <= 80; k += 17) {
230 GemmMicrokernelTester()
231 .mr(2)
232 .nr(8)
233 .kr(2)
234 .sr(1)
235 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800236 .n(n)
Frank Barchard15eec022021-11-17 13:26:20 -0800237 .k(k)
238 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -0800239 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800240 }
241 }
242 }
243
244 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, n_gt_8_subtile) {
245 TEST_REQUIRES_ARM_NEON;
246 for (uint32_t n = 9; n < 16; n++) {
247 for (size_t k = 1; k <= 80; k += 17) {
248 for (uint32_t m = 1; m <= 2; m++) {
249 GemmMicrokernelTester()
250 .mr(2)
251 .nr(8)
252 .kr(2)
253 .sr(1)
254 .m(m)
255 .n(n)
256 .k(k)
257 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800258 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800259 }
260 }
261 }
262 }
263
264 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, n_div_8) {
265 TEST_REQUIRES_ARM_NEON;
266 for (uint32_t n = 16; n <= 24; n += 8) {
267 for (size_t k = 1; k <= 80; k += 17) {
268 GemmMicrokernelTester()
269 .mr(2)
270 .nr(8)
271 .kr(2)
272 .sr(1)
273 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800274 .n(n)
Frank Barchard15eec022021-11-17 13:26:20 -0800275 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -0800276 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800277 }
278 }
279 }
280
281 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, n_div_8_strided_cn) {
282 TEST_REQUIRES_ARM_NEON;
283 for (uint32_t n = 16; n <= 24; n += 8) {
284 for (size_t k = 1; k <= 80; k += 17) {
285 GemmMicrokernelTester()
286 .mr(2)
287 .nr(8)
288 .kr(2)
289 .sr(1)
290 .m(2)
291 .n(n)
292 .k(k)
293 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -0800294 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800295 }
296 }
297 }
298
299 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, n_div_8_subtile) {
300 TEST_REQUIRES_ARM_NEON;
301 for (uint32_t n = 16; n <= 24; n += 8) {
302 for (size_t k = 1; k <= 80; k += 17) {
303 for (uint32_t m = 1; m <= 2; m++) {
304 GemmMicrokernelTester()
305 .mr(2)
306 .nr(8)
307 .kr(2)
308 .sr(1)
309 .m(m)
310 .n(n)
311 .k(k)
312 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800313 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800314 }
315 }
316 }
317 }
318
319 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, small_kernel) {
320 TEST_REQUIRES_ARM_NEON;
321 for (size_t k = 1; k <= 80; k += 17) {
322 GemmMicrokernelTester()
323 .mr(2)
324 .nr(8)
325 .kr(2)
326 .sr(1)
327 .m(2)
328 .n(8)
329 .k(k)
330 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -0800331 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800332 }
333 }
334
335 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, small_kernel_subtile) {
336 TEST_REQUIRES_ARM_NEON;
337 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800338 for (uint32_t n = 1; n <= 8; n++) {
339 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard15eec022021-11-17 13:26:20 -0800340 GemmMicrokernelTester()
341 .mr(2)
342 .nr(8)
343 .kr(2)
344 .sr(1)
345 .m(m)
346 .n(n)
347 .k(k)
348 .ks(3)
349 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800350 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800351 }
352 }
353 }
354 }
355
356 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, n_gt_8_small_kernel) {
357 TEST_REQUIRES_ARM_NEON;
358 for (uint32_t n = 9; n < 16; n++) {
359 for (size_t k = 1; k <= 80; k += 17) {
360 GemmMicrokernelTester()
361 .mr(2)
362 .nr(8)
363 .kr(2)
364 .sr(1)
365 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800366 .n(n)
Frank Barchard15eec022021-11-17 13:26:20 -0800367 .k(k)
368 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -0800369 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800370 }
371 }
372 }
373
374 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, n_div_8_small_kernel) {
375 TEST_REQUIRES_ARM_NEON;
376 for (uint32_t n = 16; n <= 24; n += 8) {
377 for (size_t k = 1; k <= 80; k += 17) {
378 GemmMicrokernelTester()
379 .mr(2)
380 .nr(8)
381 .kr(2)
382 .sr(1)
383 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800384 .n(n)
Frank Barchard15eec022021-11-17 13:26:20 -0800385 .k(k)
386 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -0800387 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800388 }
389 }
390 }
391
392 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, strided_cm_subtile) {
393 TEST_REQUIRES_ARM_NEON;
394 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800395 for (uint32_t n = 1; n <= 8; n++) {
396 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard15eec022021-11-17 13:26:20 -0800397 GemmMicrokernelTester()
398 .mr(2)
399 .nr(8)
400 .kr(2)
401 .sr(1)
402 .m(m)
403 .n(n)
404 .k(k)
405 .cm_stride(11)
406 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800407 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800408 }
409 }
410 }
411 }
412
413 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, a_offset) {
414 TEST_REQUIRES_ARM_NEON;
415 for (size_t k = 1; k <= 80; k += 17) {
416 GemmMicrokernelTester()
417 .mr(2)
418 .nr(8)
419 .kr(2)
420 .sr(1)
421 .m(2)
422 .n(8)
423 .k(k)
424 .ks(3)
425 .a_offset(163)
Marat Dukhan50323b82022-01-11 00:12:01 -0800426 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800427 }
428 }
429
430 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, zero) {
431 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -0800432 for (size_t k = 1; k <= 80; k += 17) {
433 for (uint32_t mz = 0; mz < 2; mz++) {
Frank Barchard15eec022021-11-17 13:26:20 -0800434 GemmMicrokernelTester()
435 .mr(2)
436 .nr(8)
437 .kr(2)
438 .sr(1)
439 .m(2)
440 .n(8)
441 .k(k)
442 .ks(3)
443 .a_offset(163)
444 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -0800445 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800446 }
447 }
448 }
449
450 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, qmin) {
451 TEST_REQUIRES_ARM_NEON;
452 GemmMicrokernelTester()
453 .mr(2)
454 .nr(8)
455 .kr(2)
456 .sr(1)
457 .m(2)
458 .n(8)
459 .k(16)
460 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800461 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800462 }
463
464 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, qmax) {
465 TEST_REQUIRES_ARM_NEON;
466 GemmMicrokernelTester()
467 .mr(2)
468 .nr(8)
469 .kr(2)
470 .sr(1)
471 .m(2)
472 .n(8)
473 .k(16)
474 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800475 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800476 }
477
478 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, strided_cm) {
479 TEST_REQUIRES_ARM_NEON;
480 GemmMicrokernelTester()
481 .mr(2)
482 .nr(8)
483 .kr(2)
484 .sr(1)
485 .m(2)
486 .n(8)
487 .k(16)
488 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -0800489 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800490 }
491#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
492
493
494#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Frank Barchard15eec022021-11-17 13:26:20 -0800495 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, k_eq_16) {
496 TEST_REQUIRES_ARM_NEON_V8;
497 GemmMicrokernelTester()
498 .mr(2)
499 .nr(8)
500 .kr(2)
501 .sr(1)
502 .m(2)
503 .n(8)
504 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -0800505 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800506 }
507
508 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, strided_cn) {
509 TEST_REQUIRES_ARM_NEON_V8;
510 GemmMicrokernelTester()
511 .mr(2)
512 .nr(8)
513 .kr(2)
514 .sr(1)
515 .m(2)
516 .n(8)
517 .k(16)
518 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -0800519 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800520 }
521
522 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, k_eq_16_subtile) {
523 TEST_REQUIRES_ARM_NEON_V8;
Zhi An Ng83844ae2022-01-14 09:52:25 -0800524 for (uint32_t n = 1; n <= 8; n++) {
525 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard15eec022021-11-17 13:26:20 -0800526 GemmMicrokernelTester()
527 .mr(2)
528 .nr(8)
529 .kr(2)
530 .sr(1)
531 .m(m)
532 .n(n)
533 .k(16)
534 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800535 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800536 }
537 }
538 }
539
540 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, k_eq_16_subtile_m) {
541 TEST_REQUIRES_ARM_NEON_V8;
542 for (uint32_t m = 1; m <= 2; m++) {
543 GemmMicrokernelTester()
544 .mr(2)
545 .nr(8)
546 .kr(2)
547 .sr(1)
548 .m(m)
549 .n(8)
550 .k(16)
551 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800552 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800553 }
554 }
555
556 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, k_eq_16_subtile_n) {
557 TEST_REQUIRES_ARM_NEON_V8;
558 for (uint32_t n = 1; n <= 8; n++) {
559 GemmMicrokernelTester()
560 .mr(2)
561 .nr(8)
562 .kr(2)
563 .sr(1)
564 .m(2)
565 .n(n)
566 .k(16)
567 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800568 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800569 }
570 }
571
572 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, k_lt_16) {
573 TEST_REQUIRES_ARM_NEON_V8;
574 for (size_t k = 1; k < 16; k++) {
575 GemmMicrokernelTester()
576 .mr(2)
577 .nr(8)
578 .kr(2)
579 .sr(1)
580 .m(2)
581 .n(8)
582 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -0800583 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800584 }
585 }
586
587 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, k_lt_16_subtile) {
588 TEST_REQUIRES_ARM_NEON_V8;
589 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800590 for (uint32_t n = 1; n <= 8; n++) {
591 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard15eec022021-11-17 13:26:20 -0800592 GemmMicrokernelTester()
593 .mr(2)
594 .nr(8)
595 .kr(2)
596 .sr(1)
597 .m(m)
598 .n(n)
599 .k(k)
600 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800601 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800602 }
603 }
604 }
605 }
606
607 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, k_gt_16) {
608 TEST_REQUIRES_ARM_NEON_V8;
609 for (size_t k = 17; k < 32; k++) {
610 GemmMicrokernelTester()
611 .mr(2)
612 .nr(8)
613 .kr(2)
614 .sr(1)
615 .m(2)
616 .n(8)
617 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -0800618 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800619 }
620 }
621
622 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, k_gt_16_subtile) {
623 TEST_REQUIRES_ARM_NEON_V8;
624 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800625 for (uint32_t n = 1; n <= 8; n++) {
626 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard15eec022021-11-17 13:26:20 -0800627 GemmMicrokernelTester()
628 .mr(2)
629 .nr(8)
630 .kr(2)
631 .sr(1)
632 .m(m)
633 .n(n)
634 .k(k)
635 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800636 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800637 }
638 }
639 }
640 }
641
642 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, k_div_16) {
643 TEST_REQUIRES_ARM_NEON_V8;
644 for (size_t k = 32; k <= 160; k += 16) {
645 GemmMicrokernelTester()
646 .mr(2)
647 .nr(8)
648 .kr(2)
649 .sr(1)
650 .m(2)
651 .n(8)
652 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -0800653 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800654 }
655 }
656
657 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, k_div_16_subtile) {
658 TEST_REQUIRES_ARM_NEON_V8;
659 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800660 for (uint32_t n = 1; n <= 8; n++) {
661 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard15eec022021-11-17 13:26:20 -0800662 GemmMicrokernelTester()
663 .mr(2)
664 .nr(8)
665 .kr(2)
666 .sr(1)
667 .m(m)
668 .n(n)
669 .k(k)
670 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800671 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800672 }
673 }
674 }
675 }
676
677 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, n_gt_8) {
678 TEST_REQUIRES_ARM_NEON_V8;
679 for (uint32_t n = 9; n < 16; n++) {
680 for (size_t k = 1; k <= 80; k += 17) {
681 GemmMicrokernelTester()
682 .mr(2)
683 .nr(8)
684 .kr(2)
685 .sr(1)
686 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800687 .n(n)
Frank Barchard15eec022021-11-17 13:26:20 -0800688 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -0800689 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800690 }
691 }
692 }
693
694 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, n_gt_8_strided_cn) {
695 TEST_REQUIRES_ARM_NEON_V8;
696 for (uint32_t n = 9; n < 16; n++) {
697 for (size_t k = 1; k <= 80; k += 17) {
698 GemmMicrokernelTester()
699 .mr(2)
700 .nr(8)
701 .kr(2)
702 .sr(1)
703 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800704 .n(n)
Frank Barchard15eec022021-11-17 13:26:20 -0800705 .k(k)
706 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -0800707 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800708 }
709 }
710 }
711
712 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, n_gt_8_subtile) {
713 TEST_REQUIRES_ARM_NEON_V8;
714 for (uint32_t n = 9; n < 16; n++) {
715 for (size_t k = 1; k <= 80; k += 17) {
716 for (uint32_t m = 1; m <= 2; m++) {
717 GemmMicrokernelTester()
718 .mr(2)
719 .nr(8)
720 .kr(2)
721 .sr(1)
722 .m(m)
723 .n(n)
724 .k(k)
725 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800726 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800727 }
728 }
729 }
730 }
731
732 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, n_div_8) {
733 TEST_REQUIRES_ARM_NEON_V8;
734 for (uint32_t n = 16; n <= 24; n += 8) {
735 for (size_t k = 1; k <= 80; k += 17) {
736 GemmMicrokernelTester()
737 .mr(2)
738 .nr(8)
739 .kr(2)
740 .sr(1)
741 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800742 .n(n)
Frank Barchard15eec022021-11-17 13:26:20 -0800743 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -0800744 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800745 }
746 }
747 }
748
749 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, n_div_8_strided_cn) {
750 TEST_REQUIRES_ARM_NEON_V8;
751 for (uint32_t n = 16; n <= 24; n += 8) {
752 for (size_t k = 1; k <= 80; k += 17) {
753 GemmMicrokernelTester()
754 .mr(2)
755 .nr(8)
756 .kr(2)
757 .sr(1)
758 .m(2)
759 .n(n)
760 .k(k)
761 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -0800762 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800763 }
764 }
765 }
766
767 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, n_div_8_subtile) {
768 TEST_REQUIRES_ARM_NEON_V8;
769 for (uint32_t n = 16; n <= 24; n += 8) {
770 for (size_t k = 1; k <= 80; k += 17) {
771 for (uint32_t m = 1; m <= 2; m++) {
772 GemmMicrokernelTester()
773 .mr(2)
774 .nr(8)
775 .kr(2)
776 .sr(1)
777 .m(m)
778 .n(n)
779 .k(k)
780 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800781 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800782 }
783 }
784 }
785 }
786
787 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, small_kernel) {
788 TEST_REQUIRES_ARM_NEON_V8;
789 for (size_t k = 1; k <= 80; k += 17) {
790 GemmMicrokernelTester()
791 .mr(2)
792 .nr(8)
793 .kr(2)
794 .sr(1)
795 .m(2)
796 .n(8)
797 .k(k)
798 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -0800799 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800800 }
801 }
802
803 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, small_kernel_subtile) {
804 TEST_REQUIRES_ARM_NEON_V8;
805 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800806 for (uint32_t n = 1; n <= 8; n++) {
807 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard15eec022021-11-17 13:26:20 -0800808 GemmMicrokernelTester()
809 .mr(2)
810 .nr(8)
811 .kr(2)
812 .sr(1)
813 .m(m)
814 .n(n)
815 .k(k)
816 .ks(3)
817 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800818 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800819 }
820 }
821 }
822 }
823
824 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, n_gt_8_small_kernel) {
825 TEST_REQUIRES_ARM_NEON_V8;
826 for (uint32_t n = 9; n < 16; n++) {
827 for (size_t k = 1; k <= 80; k += 17) {
828 GemmMicrokernelTester()
829 .mr(2)
830 .nr(8)
831 .kr(2)
832 .sr(1)
833 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800834 .n(n)
Frank Barchard15eec022021-11-17 13:26:20 -0800835 .k(k)
836 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -0800837 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800838 }
839 }
840 }
841
842 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, n_div_8_small_kernel) {
843 TEST_REQUIRES_ARM_NEON_V8;
844 for (uint32_t n = 16; n <= 24; n += 8) {
845 for (size_t k = 1; k <= 80; k += 17) {
846 GemmMicrokernelTester()
847 .mr(2)
848 .nr(8)
849 .kr(2)
850 .sr(1)
851 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800852 .n(n)
Frank Barchard15eec022021-11-17 13:26:20 -0800853 .k(k)
854 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -0800855 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800856 }
857 }
858 }
859
860 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, strided_cm_subtile) {
861 TEST_REQUIRES_ARM_NEON_V8;
862 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800863 for (uint32_t n = 1; n <= 8; n++) {
864 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard15eec022021-11-17 13:26:20 -0800865 GemmMicrokernelTester()
866 .mr(2)
867 .nr(8)
868 .kr(2)
869 .sr(1)
870 .m(m)
871 .n(n)
872 .k(k)
873 .cm_stride(11)
874 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800875 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800876 }
877 }
878 }
879 }
880
881 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, a_offset) {
882 TEST_REQUIRES_ARM_NEON_V8;
883 for (size_t k = 1; k <= 80; k += 17) {
884 GemmMicrokernelTester()
885 .mr(2)
886 .nr(8)
887 .kr(2)
888 .sr(1)
889 .m(2)
890 .n(8)
891 .k(k)
892 .ks(3)
893 .a_offset(163)
Marat Dukhan50323b82022-01-11 00:12:01 -0800894 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800895 }
896 }
897
898 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, zero) {
899 TEST_REQUIRES_ARM_NEON_V8;
Zhi An Ng83844ae2022-01-14 09:52:25 -0800900 for (size_t k = 1; k <= 80; k += 17) {
901 for (uint32_t mz = 0; mz < 2; mz++) {
Frank Barchard15eec022021-11-17 13:26:20 -0800902 GemmMicrokernelTester()
903 .mr(2)
904 .nr(8)
905 .kr(2)
906 .sr(1)
907 .m(2)
908 .n(8)
909 .k(k)
910 .ks(3)
911 .a_offset(163)
912 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -0800913 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800914 }
915 }
916 }
917
918 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, qmin) {
919 TEST_REQUIRES_ARM_NEON_V8;
920 GemmMicrokernelTester()
921 .mr(2)
922 .nr(8)
923 .kr(2)
924 .sr(1)
925 .m(2)
926 .n(8)
927 .k(16)
928 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800929 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800930 }
931
932 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, qmax) {
933 TEST_REQUIRES_ARM_NEON_V8;
934 GemmMicrokernelTester()
935 .mr(2)
936 .nr(8)
937 .kr(2)
938 .sr(1)
939 .m(2)
940 .n(8)
941 .k(16)
942 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800943 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800944 }
945
946 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, strided_cm) {
947 TEST_REQUIRES_ARM_NEON_V8;
948 GemmMicrokernelTester()
949 .mr(2)
950 .nr(8)
951 .kr(2)
952 .sr(1)
953 .m(2)
954 .n(8)
955 .k(16)
956 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -0800957 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800958 }
959#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
960
961
962#if XNN_ARCH_ARM || XNN_ARCH_ARM64
963 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_eq_16) {
964 TEST_REQUIRES_ARM_NEON;
965 GemmMicrokernelTester()
966 .mr(1)
967 .nr(8)
968 .kr(2)
969 .sr(1)
970 .m(1)
971 .n(8)
972 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -0800973 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800974 }
975
976 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, strided_cn) {
977 TEST_REQUIRES_ARM_NEON;
978 GemmMicrokernelTester()
979 .mr(1)
980 .nr(8)
981 .kr(2)
982 .sr(1)
983 .m(1)
984 .n(8)
985 .k(16)
986 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -0800987 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800988 }
989
990 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_eq_16_subtile) {
991 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -0800992 for (uint32_t n = 1; n <= 8; n++) {
993 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard15eec022021-11-17 13:26:20 -0800994 GemmMicrokernelTester()
995 .mr(1)
996 .nr(8)
997 .kr(2)
998 .sr(1)
999 .m(m)
1000 .n(n)
1001 .k(16)
1002 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001003 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001004 }
1005 }
1006 }
1007
1008 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_eq_16_subtile_m) {
1009 TEST_REQUIRES_ARM_NEON;
1010 for (uint32_t m = 1; m <= 1; m++) {
1011 GemmMicrokernelTester()
1012 .mr(1)
1013 .nr(8)
1014 .kr(2)
1015 .sr(1)
1016 .m(m)
1017 .n(8)
1018 .k(16)
1019 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001020 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001021 }
1022 }
1023
1024 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_eq_16_subtile_n) {
1025 TEST_REQUIRES_ARM_NEON;
1026 for (uint32_t n = 1; n <= 8; n++) {
1027 GemmMicrokernelTester()
1028 .mr(1)
1029 .nr(8)
1030 .kr(2)
1031 .sr(1)
1032 .m(1)
1033 .n(n)
1034 .k(16)
1035 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001036 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001037 }
1038 }
1039
1040 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_lt_16) {
1041 TEST_REQUIRES_ARM_NEON;
1042 for (size_t k = 1; k < 16; k++) {
1043 GemmMicrokernelTester()
1044 .mr(1)
1045 .nr(8)
1046 .kr(2)
1047 .sr(1)
1048 .m(1)
1049 .n(8)
1050 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001051 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001052 }
1053 }
1054
1055 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_lt_16_subtile) {
1056 TEST_REQUIRES_ARM_NEON;
1057 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001058 for (uint32_t n = 1; n <= 8; n++) {
1059 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard15eec022021-11-17 13:26:20 -08001060 GemmMicrokernelTester()
1061 .mr(1)
1062 .nr(8)
1063 .kr(2)
1064 .sr(1)
1065 .m(m)
1066 .n(n)
1067 .k(k)
1068 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001069 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001070 }
1071 }
1072 }
1073 }
1074
1075 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_gt_16) {
1076 TEST_REQUIRES_ARM_NEON;
1077 for (size_t k = 17; k < 32; k++) {
1078 GemmMicrokernelTester()
1079 .mr(1)
1080 .nr(8)
1081 .kr(2)
1082 .sr(1)
1083 .m(1)
1084 .n(8)
1085 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001086 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001087 }
1088 }
1089
1090 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_gt_16_subtile) {
1091 TEST_REQUIRES_ARM_NEON;
1092 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001093 for (uint32_t n = 1; n <= 8; n++) {
1094 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard15eec022021-11-17 13:26:20 -08001095 GemmMicrokernelTester()
1096 .mr(1)
1097 .nr(8)
1098 .kr(2)
1099 .sr(1)
1100 .m(m)
1101 .n(n)
1102 .k(k)
1103 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001104 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001105 }
1106 }
1107 }
1108 }
1109
1110 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_div_16) {
1111 TEST_REQUIRES_ARM_NEON;
1112 for (size_t k = 32; k <= 160; k += 16) {
1113 GemmMicrokernelTester()
1114 .mr(1)
1115 .nr(8)
1116 .kr(2)
1117 .sr(1)
1118 .m(1)
1119 .n(8)
1120 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001121 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001122 }
1123 }
1124
1125 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_div_16_subtile) {
1126 TEST_REQUIRES_ARM_NEON;
1127 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001128 for (uint32_t n = 1; n <= 8; n++) {
1129 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard15eec022021-11-17 13:26:20 -08001130 GemmMicrokernelTester()
1131 .mr(1)
1132 .nr(8)
1133 .kr(2)
1134 .sr(1)
1135 .m(m)
1136 .n(n)
1137 .k(k)
1138 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001139 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001140 }
1141 }
1142 }
1143 }
1144
1145 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, n_gt_8) {
1146 TEST_REQUIRES_ARM_NEON;
1147 for (uint32_t n = 9; n < 16; n++) {
1148 for (size_t k = 1; k <= 80; k += 17) {
1149 GemmMicrokernelTester()
1150 .mr(1)
1151 .nr(8)
1152 .kr(2)
1153 .sr(1)
1154 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001155 .n(n)
Frank Barchard15eec022021-11-17 13:26:20 -08001156 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001157 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001158 }
1159 }
1160 }
1161
1162 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, n_gt_8_strided_cn) {
1163 TEST_REQUIRES_ARM_NEON;
1164 for (uint32_t n = 9; n < 16; n++) {
1165 for (size_t k = 1; k <= 80; k += 17) {
1166 GemmMicrokernelTester()
1167 .mr(1)
1168 .nr(8)
1169 .kr(2)
1170 .sr(1)
1171 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001172 .n(n)
Frank Barchard15eec022021-11-17 13:26:20 -08001173 .k(k)
1174 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08001175 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001176 }
1177 }
1178 }
1179
1180 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, n_gt_8_subtile) {
1181 TEST_REQUIRES_ARM_NEON;
1182 for (uint32_t n = 9; n < 16; n++) {
1183 for (size_t k = 1; k <= 80; k += 17) {
1184 for (uint32_t m = 1; m <= 1; m++) {
1185 GemmMicrokernelTester()
1186 .mr(1)
1187 .nr(8)
1188 .kr(2)
1189 .sr(1)
1190 .m(m)
1191 .n(n)
1192 .k(k)
1193 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001194 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001195 }
1196 }
1197 }
1198 }
1199
1200 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, n_div_8) {
1201 TEST_REQUIRES_ARM_NEON;
1202 for (uint32_t n = 16; n <= 24; n += 8) {
1203 for (size_t k = 1; k <= 80; k += 17) {
1204 GemmMicrokernelTester()
1205 .mr(1)
1206 .nr(8)
1207 .kr(2)
1208 .sr(1)
1209 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001210 .n(n)
Frank Barchard15eec022021-11-17 13:26:20 -08001211 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001212 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001213 }
1214 }
1215 }
1216
1217 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, n_div_8_strided_cn) {
1218 TEST_REQUIRES_ARM_NEON;
1219 for (uint32_t n = 16; n <= 24; n += 8) {
1220 for (size_t k = 1; k <= 80; k += 17) {
1221 GemmMicrokernelTester()
1222 .mr(1)
1223 .nr(8)
1224 .kr(2)
1225 .sr(1)
1226 .m(1)
1227 .n(n)
1228 .k(k)
1229 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08001230 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001231 }
1232 }
1233 }
1234
1235 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, n_div_8_subtile) {
1236 TEST_REQUIRES_ARM_NEON;
1237 for (uint32_t n = 16; n <= 24; n += 8) {
1238 for (size_t k = 1; k <= 80; k += 17) {
1239 for (uint32_t m = 1; m <= 1; m++) {
1240 GemmMicrokernelTester()
1241 .mr(1)
1242 .nr(8)
1243 .kr(2)
1244 .sr(1)
1245 .m(m)
1246 .n(n)
1247 .k(k)
1248 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001249 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001250 }
1251 }
1252 }
1253 }
1254
1255 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, small_kernel) {
1256 TEST_REQUIRES_ARM_NEON;
1257 for (size_t k = 1; k <= 80; k += 17) {
1258 GemmMicrokernelTester()
1259 .mr(1)
1260 .nr(8)
1261 .kr(2)
1262 .sr(1)
1263 .m(1)
1264 .n(8)
1265 .k(k)
1266 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08001267 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001268 }
1269 }
1270
1271 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, small_kernel_subtile) {
1272 TEST_REQUIRES_ARM_NEON;
1273 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001274 for (uint32_t n = 1; n <= 8; n++) {
1275 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard15eec022021-11-17 13:26:20 -08001276 GemmMicrokernelTester()
1277 .mr(1)
1278 .nr(8)
1279 .kr(2)
1280 .sr(1)
1281 .m(m)
1282 .n(n)
1283 .k(k)
1284 .ks(3)
1285 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001286 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001287 }
1288 }
1289 }
1290 }
1291
1292 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, n_gt_8_small_kernel) {
1293 TEST_REQUIRES_ARM_NEON;
1294 for (uint32_t n = 9; n < 16; n++) {
1295 for (size_t k = 1; k <= 80; k += 17) {
1296 GemmMicrokernelTester()
1297 .mr(1)
1298 .nr(8)
1299 .kr(2)
1300 .sr(1)
1301 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001302 .n(n)
Frank Barchard15eec022021-11-17 13:26:20 -08001303 .k(k)
1304 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08001305 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001306 }
1307 }
1308 }
1309
1310 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, n_div_8_small_kernel) {
1311 TEST_REQUIRES_ARM_NEON;
1312 for (uint32_t n = 16; n <= 24; n += 8) {
1313 for (size_t k = 1; k <= 80; k += 17) {
1314 GemmMicrokernelTester()
1315 .mr(1)
1316 .nr(8)
1317 .kr(2)
1318 .sr(1)
1319 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001320 .n(n)
Frank Barchard15eec022021-11-17 13:26:20 -08001321 .k(k)
1322 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08001323 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001324 }
1325 }
1326 }
1327
1328 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, strided_cm_subtile) {
1329 TEST_REQUIRES_ARM_NEON;
1330 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001331 for (uint32_t n = 1; n <= 8; n++) {
1332 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard15eec022021-11-17 13:26:20 -08001333 GemmMicrokernelTester()
1334 .mr(1)
1335 .nr(8)
1336 .kr(2)
1337 .sr(1)
1338 .m(m)
1339 .n(n)
1340 .k(k)
1341 .cm_stride(11)
1342 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001343 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001344 }
1345 }
1346 }
1347 }
1348
1349 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, a_offset) {
1350 TEST_REQUIRES_ARM_NEON;
1351 for (size_t k = 1; k <= 80; k += 17) {
1352 GemmMicrokernelTester()
1353 .mr(1)
1354 .nr(8)
1355 .kr(2)
1356 .sr(1)
1357 .m(1)
1358 .n(8)
1359 .k(k)
1360 .ks(3)
1361 .a_offset(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08001362 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001363 }
1364 }
1365
1366 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, zero) {
1367 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08001368 for (size_t k = 1; k <= 80; k += 17) {
1369 for (uint32_t mz = 0; mz < 1; mz++) {
Frank Barchard15eec022021-11-17 13:26:20 -08001370 GemmMicrokernelTester()
1371 .mr(1)
1372 .nr(8)
1373 .kr(2)
1374 .sr(1)
1375 .m(1)
1376 .n(8)
1377 .k(k)
1378 .ks(3)
1379 .a_offset(83)
1380 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08001381 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001382 }
1383 }
1384 }
1385
1386 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, qmin) {
1387 TEST_REQUIRES_ARM_NEON;
1388 GemmMicrokernelTester()
1389 .mr(1)
1390 .nr(8)
1391 .kr(2)
1392 .sr(1)
1393 .m(1)
1394 .n(8)
1395 .k(16)
1396 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001397 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001398 }
1399
1400 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, qmax) {
1401 TEST_REQUIRES_ARM_NEON;
1402 GemmMicrokernelTester()
1403 .mr(1)
1404 .nr(8)
1405 .kr(2)
1406 .sr(1)
1407 .m(1)
1408 .n(8)
1409 .k(16)
1410 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001411 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001412 }
1413
1414 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, strided_cm) {
1415 TEST_REQUIRES_ARM_NEON;
1416 GemmMicrokernelTester()
1417 .mr(1)
1418 .nr(8)
1419 .kr(2)
1420 .sr(1)
1421 .m(1)
1422 .n(8)
1423 .k(16)
1424 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08001425 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001426 }
1427#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1428
1429
1430#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Frank Barchard15eec022021-11-17 13:26:20 -08001431 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, k_eq_16) {
1432 TEST_REQUIRES_ARM_NEON_V8;
1433 GemmMicrokernelTester()
1434 .mr(1)
1435 .nr(8)
1436 .kr(2)
1437 .sr(1)
1438 .m(1)
1439 .n(8)
1440 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08001441 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001442 }
1443
1444 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, strided_cn) {
1445 TEST_REQUIRES_ARM_NEON_V8;
1446 GemmMicrokernelTester()
1447 .mr(1)
1448 .nr(8)
1449 .kr(2)
1450 .sr(1)
1451 .m(1)
1452 .n(8)
1453 .k(16)
1454 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08001455 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001456 }
1457
1458 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, k_eq_16_subtile) {
1459 TEST_REQUIRES_ARM_NEON_V8;
Zhi An Ng83844ae2022-01-14 09:52:25 -08001460 for (uint32_t n = 1; n <= 8; n++) {
1461 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard15eec022021-11-17 13:26:20 -08001462 GemmMicrokernelTester()
1463 .mr(1)
1464 .nr(8)
1465 .kr(2)
1466 .sr(1)
1467 .m(m)
1468 .n(n)
1469 .k(16)
1470 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001471 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001472 }
1473 }
1474 }
1475
1476 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, k_eq_16_subtile_m) {
1477 TEST_REQUIRES_ARM_NEON_V8;
1478 for (uint32_t m = 1; m <= 1; m++) {
1479 GemmMicrokernelTester()
1480 .mr(1)
1481 .nr(8)
1482 .kr(2)
1483 .sr(1)
1484 .m(m)
1485 .n(8)
1486 .k(16)
1487 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001488 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001489 }
1490 }
1491
1492 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, k_eq_16_subtile_n) {
1493 TEST_REQUIRES_ARM_NEON_V8;
1494 for (uint32_t n = 1; n <= 8; n++) {
1495 GemmMicrokernelTester()
1496 .mr(1)
1497 .nr(8)
1498 .kr(2)
1499 .sr(1)
1500 .m(1)
1501 .n(n)
1502 .k(16)
1503 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001504 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001505 }
1506 }
1507
1508 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, k_lt_16) {
1509 TEST_REQUIRES_ARM_NEON_V8;
1510 for (size_t k = 1; k < 16; k++) {
1511 GemmMicrokernelTester()
1512 .mr(1)
1513 .nr(8)
1514 .kr(2)
1515 .sr(1)
1516 .m(1)
1517 .n(8)
1518 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001519 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001520 }
1521 }
1522
1523 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, k_lt_16_subtile) {
1524 TEST_REQUIRES_ARM_NEON_V8;
1525 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001526 for (uint32_t n = 1; n <= 8; n++) {
1527 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard15eec022021-11-17 13:26:20 -08001528 GemmMicrokernelTester()
1529 .mr(1)
1530 .nr(8)
1531 .kr(2)
1532 .sr(1)
1533 .m(m)
1534 .n(n)
1535 .k(k)
1536 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001537 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001538 }
1539 }
1540 }
1541 }
1542
1543 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, k_gt_16) {
1544 TEST_REQUIRES_ARM_NEON_V8;
1545 for (size_t k = 17; k < 32; k++) {
1546 GemmMicrokernelTester()
1547 .mr(1)
1548 .nr(8)
1549 .kr(2)
1550 .sr(1)
1551 .m(1)
1552 .n(8)
1553 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001554 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001555 }
1556 }
1557
1558 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, k_gt_16_subtile) {
1559 TEST_REQUIRES_ARM_NEON_V8;
1560 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001561 for (uint32_t n = 1; n <= 8; n++) {
1562 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard15eec022021-11-17 13:26:20 -08001563 GemmMicrokernelTester()
1564 .mr(1)
1565 .nr(8)
1566 .kr(2)
1567 .sr(1)
1568 .m(m)
1569 .n(n)
1570 .k(k)
1571 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001572 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001573 }
1574 }
1575 }
1576 }
1577
1578 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, k_div_16) {
1579 TEST_REQUIRES_ARM_NEON_V8;
1580 for (size_t k = 32; k <= 160; k += 16) {
1581 GemmMicrokernelTester()
1582 .mr(1)
1583 .nr(8)
1584 .kr(2)
1585 .sr(1)
1586 .m(1)
1587 .n(8)
1588 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001589 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001590 }
1591 }
1592
1593 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, k_div_16_subtile) {
1594 TEST_REQUIRES_ARM_NEON_V8;
1595 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001596 for (uint32_t n = 1; n <= 8; n++) {
1597 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard15eec022021-11-17 13:26:20 -08001598 GemmMicrokernelTester()
1599 .mr(1)
1600 .nr(8)
1601 .kr(2)
1602 .sr(1)
1603 .m(m)
1604 .n(n)
1605 .k(k)
1606 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001607 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001608 }
1609 }
1610 }
1611 }
1612
1613 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, n_gt_8) {
1614 TEST_REQUIRES_ARM_NEON_V8;
1615 for (uint32_t n = 9; n < 16; n++) {
1616 for (size_t k = 1; k <= 80; k += 17) {
1617 GemmMicrokernelTester()
1618 .mr(1)
1619 .nr(8)
1620 .kr(2)
1621 .sr(1)
1622 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001623 .n(n)
Frank Barchard15eec022021-11-17 13:26:20 -08001624 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001625 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001626 }
1627 }
1628 }
1629
1630 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, n_gt_8_strided_cn) {
1631 TEST_REQUIRES_ARM_NEON_V8;
1632 for (uint32_t n = 9; n < 16; n++) {
1633 for (size_t k = 1; k <= 80; k += 17) {
1634 GemmMicrokernelTester()
1635 .mr(1)
1636 .nr(8)
1637 .kr(2)
1638 .sr(1)
1639 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001640 .n(n)
Frank Barchard15eec022021-11-17 13:26:20 -08001641 .k(k)
1642 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08001643 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001644 }
1645 }
1646 }
1647
1648 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, n_gt_8_subtile) {
1649 TEST_REQUIRES_ARM_NEON_V8;
1650 for (uint32_t n = 9; n < 16; n++) {
1651 for (size_t k = 1; k <= 80; k += 17) {
1652 for (uint32_t m = 1; m <= 1; m++) {
1653 GemmMicrokernelTester()
1654 .mr(1)
1655 .nr(8)
1656 .kr(2)
1657 .sr(1)
1658 .m(m)
1659 .n(n)
1660 .k(k)
1661 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001662 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001663 }
1664 }
1665 }
1666 }
1667
1668 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, n_div_8) {
1669 TEST_REQUIRES_ARM_NEON_V8;
1670 for (uint32_t n = 16; n <= 24; n += 8) {
1671 for (size_t k = 1; k <= 80; k += 17) {
1672 GemmMicrokernelTester()
1673 .mr(1)
1674 .nr(8)
1675 .kr(2)
1676 .sr(1)
1677 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001678 .n(n)
Frank Barchard15eec022021-11-17 13:26:20 -08001679 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001680 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001681 }
1682 }
1683 }
1684
1685 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, n_div_8_strided_cn) {
1686 TEST_REQUIRES_ARM_NEON_V8;
1687 for (uint32_t n = 16; n <= 24; n += 8) {
1688 for (size_t k = 1; k <= 80; k += 17) {
1689 GemmMicrokernelTester()
1690 .mr(1)
1691 .nr(8)
1692 .kr(2)
1693 .sr(1)
1694 .m(1)
1695 .n(n)
1696 .k(k)
1697 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08001698 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001699 }
1700 }
1701 }
1702
1703 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, n_div_8_subtile) {
1704 TEST_REQUIRES_ARM_NEON_V8;
1705 for (uint32_t n = 16; n <= 24; n += 8) {
1706 for (size_t k = 1; k <= 80; k += 17) {
1707 for (uint32_t m = 1; m <= 1; m++) {
1708 GemmMicrokernelTester()
1709 .mr(1)
1710 .nr(8)
1711 .kr(2)
1712 .sr(1)
1713 .m(m)
1714 .n(n)
1715 .k(k)
1716 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001717 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001718 }
1719 }
1720 }
1721 }
1722
1723 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, small_kernel) {
1724 TEST_REQUIRES_ARM_NEON_V8;
1725 for (size_t k = 1; k <= 80; k += 17) {
1726 GemmMicrokernelTester()
1727 .mr(1)
1728 .nr(8)
1729 .kr(2)
1730 .sr(1)
1731 .m(1)
1732 .n(8)
1733 .k(k)
1734 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08001735 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001736 }
1737 }
1738
1739 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, small_kernel_subtile) {
1740 TEST_REQUIRES_ARM_NEON_V8;
1741 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001742 for (uint32_t n = 1; n <= 8; n++) {
1743 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard15eec022021-11-17 13:26:20 -08001744 GemmMicrokernelTester()
1745 .mr(1)
1746 .nr(8)
1747 .kr(2)
1748 .sr(1)
1749 .m(m)
1750 .n(n)
1751 .k(k)
1752 .ks(3)
1753 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001754 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001755 }
1756 }
1757 }
1758 }
1759
1760 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, n_gt_8_small_kernel) {
1761 TEST_REQUIRES_ARM_NEON_V8;
1762 for (uint32_t n = 9; n < 16; n++) {
1763 for (size_t k = 1; k <= 80; k += 17) {
1764 GemmMicrokernelTester()
1765 .mr(1)
1766 .nr(8)
1767 .kr(2)
1768 .sr(1)
1769 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001770 .n(n)
Frank Barchard15eec022021-11-17 13:26:20 -08001771 .k(k)
1772 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08001773 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001774 }
1775 }
1776 }
1777
1778 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, n_div_8_small_kernel) {
1779 TEST_REQUIRES_ARM_NEON_V8;
1780 for (uint32_t n = 16; n <= 24; n += 8) {
1781 for (size_t k = 1; k <= 80; k += 17) {
1782 GemmMicrokernelTester()
1783 .mr(1)
1784 .nr(8)
1785 .kr(2)
1786 .sr(1)
1787 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001788 .n(n)
Frank Barchard15eec022021-11-17 13:26:20 -08001789 .k(k)
1790 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08001791 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001792 }
1793 }
1794 }
1795
1796 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, strided_cm_subtile) {
1797 TEST_REQUIRES_ARM_NEON_V8;
1798 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001799 for (uint32_t n = 1; n <= 8; n++) {
1800 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard15eec022021-11-17 13:26:20 -08001801 GemmMicrokernelTester()
1802 .mr(1)
1803 .nr(8)
1804 .kr(2)
1805 .sr(1)
1806 .m(m)
1807 .n(n)
1808 .k(k)
1809 .cm_stride(11)
1810 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001811 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001812 }
1813 }
1814 }
1815 }
1816
1817 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, a_offset) {
1818 TEST_REQUIRES_ARM_NEON_V8;
1819 for (size_t k = 1; k <= 80; k += 17) {
1820 GemmMicrokernelTester()
1821 .mr(1)
1822 .nr(8)
1823 .kr(2)
1824 .sr(1)
1825 .m(1)
1826 .n(8)
1827 .k(k)
1828 .ks(3)
1829 .a_offset(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08001830 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001831 }
1832 }
1833
1834 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, zero) {
1835 TEST_REQUIRES_ARM_NEON_V8;
Zhi An Ng83844ae2022-01-14 09:52:25 -08001836 for (size_t k = 1; k <= 80; k += 17) {
1837 for (uint32_t mz = 0; mz < 1; mz++) {
Frank Barchard15eec022021-11-17 13:26:20 -08001838 GemmMicrokernelTester()
1839 .mr(1)
1840 .nr(8)
1841 .kr(2)
1842 .sr(1)
1843 .m(1)
1844 .n(8)
1845 .k(k)
1846 .ks(3)
1847 .a_offset(83)
1848 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08001849 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001850 }
1851 }
1852 }
1853
1854 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, qmin) {
1855 TEST_REQUIRES_ARM_NEON_V8;
1856 GemmMicrokernelTester()
1857 .mr(1)
1858 .nr(8)
1859 .kr(2)
1860 .sr(1)
1861 .m(1)
1862 .n(8)
1863 .k(16)
1864 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001865 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001866 }
1867
1868 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, qmax) {
1869 TEST_REQUIRES_ARM_NEON_V8;
1870 GemmMicrokernelTester()
1871 .mr(1)
1872 .nr(8)
1873 .kr(2)
1874 .sr(1)
1875 .m(1)
1876 .n(8)
1877 .k(16)
1878 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001879 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001880 }
1881
1882 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, strided_cm) {
1883 TEST_REQUIRES_ARM_NEON_V8;
1884 GemmMicrokernelTester()
1885 .mr(1)
1886 .nr(8)
1887 .kr(2)
1888 .sr(1)
1889 .m(1)
1890 .n(8)
1891 .k(16)
1892 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08001893 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001894 }
1895#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1896
1897
1898#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Frank Barchard42f5c502021-11-16 10:04:21 -08001899 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_eq_16) {
1900 TEST_REQUIRES_ARM_NEON;
1901 GemmMicrokernelTester()
1902 .mr(1)
1903 .nr(8)
1904 .kr(2)
1905 .sr(1)
1906 .m(1)
1907 .n(8)
1908 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08001909 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08001910 }
1911
1912 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, strided_cn) {
1913 TEST_REQUIRES_ARM_NEON;
1914 GemmMicrokernelTester()
1915 .mr(1)
1916 .nr(8)
1917 .kr(2)
1918 .sr(1)
1919 .m(1)
1920 .n(8)
1921 .k(16)
1922 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08001923 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08001924 }
1925
1926 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_eq_16_subtile) {
1927 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08001928 for (uint32_t n = 1; n <= 8; n++) {
1929 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard42f5c502021-11-16 10:04:21 -08001930 GemmMicrokernelTester()
1931 .mr(1)
1932 .nr(8)
1933 .kr(2)
1934 .sr(1)
1935 .m(m)
1936 .n(n)
1937 .k(16)
1938 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001939 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08001940 }
1941 }
1942 }
1943
1944 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_eq_16_subtile_m) {
1945 TEST_REQUIRES_ARM_NEON;
1946 for (uint32_t m = 1; m <= 1; m++) {
1947 GemmMicrokernelTester()
1948 .mr(1)
1949 .nr(8)
1950 .kr(2)
1951 .sr(1)
1952 .m(m)
1953 .n(8)
1954 .k(16)
1955 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001956 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08001957 }
1958 }
1959
1960 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_eq_16_subtile_n) {
1961 TEST_REQUIRES_ARM_NEON;
1962 for (uint32_t n = 1; n <= 8; n++) {
1963 GemmMicrokernelTester()
1964 .mr(1)
1965 .nr(8)
1966 .kr(2)
1967 .sr(1)
1968 .m(1)
1969 .n(n)
1970 .k(16)
1971 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001972 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08001973 }
1974 }
1975
1976 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_lt_16) {
1977 TEST_REQUIRES_ARM_NEON;
1978 for (size_t k = 1; k < 16; k++) {
1979 GemmMicrokernelTester()
1980 .mr(1)
1981 .nr(8)
1982 .kr(2)
1983 .sr(1)
1984 .m(1)
1985 .n(8)
1986 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001987 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08001988 }
1989 }
1990
1991 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_lt_16_subtile) {
1992 TEST_REQUIRES_ARM_NEON;
1993 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001994 for (uint32_t n = 1; n <= 8; n++) {
1995 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard42f5c502021-11-16 10:04:21 -08001996 GemmMicrokernelTester()
1997 .mr(1)
1998 .nr(8)
1999 .kr(2)
2000 .sr(1)
2001 .m(m)
2002 .n(n)
2003 .k(k)
2004 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002005 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002006 }
2007 }
2008 }
2009 }
2010
2011 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_gt_16) {
2012 TEST_REQUIRES_ARM_NEON;
2013 for (size_t k = 17; k < 32; k++) {
2014 GemmMicrokernelTester()
2015 .mr(1)
2016 .nr(8)
2017 .kr(2)
2018 .sr(1)
2019 .m(1)
2020 .n(8)
2021 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002022 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002023 }
2024 }
2025
2026 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_gt_16_subtile) {
2027 TEST_REQUIRES_ARM_NEON;
2028 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002029 for (uint32_t n = 1; n <= 8; n++) {
2030 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard42f5c502021-11-16 10:04:21 -08002031 GemmMicrokernelTester()
2032 .mr(1)
2033 .nr(8)
2034 .kr(2)
2035 .sr(1)
2036 .m(m)
2037 .n(n)
2038 .k(k)
2039 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002040 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002041 }
2042 }
2043 }
2044 }
2045
2046 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_div_16) {
2047 TEST_REQUIRES_ARM_NEON;
2048 for (size_t k = 32; k <= 160; k += 16) {
2049 GemmMicrokernelTester()
2050 .mr(1)
2051 .nr(8)
2052 .kr(2)
2053 .sr(1)
2054 .m(1)
2055 .n(8)
2056 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002057 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002058 }
2059 }
2060
2061 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_div_16_subtile) {
2062 TEST_REQUIRES_ARM_NEON;
2063 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002064 for (uint32_t n = 1; n <= 8; n++) {
2065 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard42f5c502021-11-16 10:04:21 -08002066 GemmMicrokernelTester()
2067 .mr(1)
2068 .nr(8)
2069 .kr(2)
2070 .sr(1)
2071 .m(m)
2072 .n(n)
2073 .k(k)
2074 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002075 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002076 }
2077 }
2078 }
2079 }
2080
2081 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, n_gt_8) {
2082 TEST_REQUIRES_ARM_NEON;
2083 for (uint32_t n = 9; n < 16; n++) {
2084 for (size_t k = 1; k <= 80; k += 17) {
2085 GemmMicrokernelTester()
2086 .mr(1)
2087 .nr(8)
2088 .kr(2)
2089 .sr(1)
2090 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002091 .n(n)
Frank Barchard42f5c502021-11-16 10:04:21 -08002092 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002093 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002094 }
2095 }
2096 }
2097
2098 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, n_gt_8_strided_cn) {
2099 TEST_REQUIRES_ARM_NEON;
2100 for (uint32_t n = 9; n < 16; n++) {
2101 for (size_t k = 1; k <= 80; k += 17) {
2102 GemmMicrokernelTester()
2103 .mr(1)
2104 .nr(8)
2105 .kr(2)
2106 .sr(1)
2107 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002108 .n(n)
Frank Barchard42f5c502021-11-16 10:04:21 -08002109 .k(k)
2110 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08002111 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002112 }
2113 }
2114 }
2115
2116 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, n_gt_8_subtile) {
2117 TEST_REQUIRES_ARM_NEON;
2118 for (uint32_t n = 9; n < 16; n++) {
2119 for (size_t k = 1; k <= 80; k += 17) {
2120 for (uint32_t m = 1; m <= 1; m++) {
2121 GemmMicrokernelTester()
2122 .mr(1)
2123 .nr(8)
2124 .kr(2)
2125 .sr(1)
2126 .m(m)
2127 .n(n)
2128 .k(k)
2129 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002130 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002131 }
2132 }
2133 }
2134 }
2135
2136 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, n_div_8) {
2137 TEST_REQUIRES_ARM_NEON;
2138 for (uint32_t n = 16; n <= 24; n += 8) {
2139 for (size_t k = 1; k <= 80; k += 17) {
2140 GemmMicrokernelTester()
2141 .mr(1)
2142 .nr(8)
2143 .kr(2)
2144 .sr(1)
2145 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002146 .n(n)
Frank Barchard42f5c502021-11-16 10:04:21 -08002147 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002148 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002149 }
2150 }
2151 }
2152
2153 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, n_div_8_strided_cn) {
2154 TEST_REQUIRES_ARM_NEON;
2155 for (uint32_t n = 16; n <= 24; n += 8) {
2156 for (size_t k = 1; k <= 80; k += 17) {
2157 GemmMicrokernelTester()
2158 .mr(1)
2159 .nr(8)
2160 .kr(2)
2161 .sr(1)
2162 .m(1)
2163 .n(n)
2164 .k(k)
2165 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08002166 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002167 }
2168 }
2169 }
2170
2171 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, n_div_8_subtile) {
2172 TEST_REQUIRES_ARM_NEON;
2173 for (uint32_t n = 16; n <= 24; n += 8) {
2174 for (size_t k = 1; k <= 80; k += 17) {
2175 for (uint32_t m = 1; m <= 1; m++) {
2176 GemmMicrokernelTester()
2177 .mr(1)
2178 .nr(8)
2179 .kr(2)
2180 .sr(1)
2181 .m(m)
2182 .n(n)
2183 .k(k)
2184 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002185 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002186 }
2187 }
2188 }
2189 }
2190
2191 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, small_kernel) {
2192 TEST_REQUIRES_ARM_NEON;
2193 for (size_t k = 1; k <= 80; k += 17) {
2194 GemmMicrokernelTester()
2195 .mr(1)
2196 .nr(8)
2197 .kr(2)
2198 .sr(1)
2199 .m(1)
2200 .n(8)
2201 .k(k)
2202 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08002203 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002204 }
2205 }
2206
2207 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, small_kernel_subtile) {
2208 TEST_REQUIRES_ARM_NEON;
2209 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002210 for (uint32_t n = 1; n <= 8; n++) {
2211 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard42f5c502021-11-16 10:04:21 -08002212 GemmMicrokernelTester()
2213 .mr(1)
2214 .nr(8)
2215 .kr(2)
2216 .sr(1)
2217 .m(m)
2218 .n(n)
2219 .k(k)
2220 .ks(3)
2221 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002222 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002223 }
2224 }
2225 }
2226 }
2227
2228 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, n_gt_8_small_kernel) {
2229 TEST_REQUIRES_ARM_NEON;
2230 for (uint32_t n = 9; n < 16; n++) {
2231 for (size_t k = 1; k <= 80; k += 17) {
2232 GemmMicrokernelTester()
2233 .mr(1)
2234 .nr(8)
2235 .kr(2)
2236 .sr(1)
2237 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002238 .n(n)
Frank Barchard42f5c502021-11-16 10:04:21 -08002239 .k(k)
2240 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08002241 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002242 }
2243 }
2244 }
2245
2246 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, n_div_8_small_kernel) {
2247 TEST_REQUIRES_ARM_NEON;
2248 for (uint32_t n = 16; n <= 24; n += 8) {
2249 for (size_t k = 1; k <= 80; k += 17) {
2250 GemmMicrokernelTester()
2251 .mr(1)
2252 .nr(8)
2253 .kr(2)
2254 .sr(1)
2255 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002256 .n(n)
Frank Barchard42f5c502021-11-16 10:04:21 -08002257 .k(k)
2258 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08002259 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002260 }
2261 }
2262 }
2263
2264 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, strided_cm_subtile) {
2265 TEST_REQUIRES_ARM_NEON;
2266 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002267 for (uint32_t n = 1; n <= 8; n++) {
2268 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard42f5c502021-11-16 10:04:21 -08002269 GemmMicrokernelTester()
2270 .mr(1)
2271 .nr(8)
2272 .kr(2)
2273 .sr(1)
2274 .m(m)
2275 .n(n)
2276 .k(k)
2277 .cm_stride(11)
2278 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002279 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002280 }
2281 }
2282 }
2283 }
2284
2285 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, a_offset) {
2286 TEST_REQUIRES_ARM_NEON;
2287 for (size_t k = 1; k <= 80; k += 17) {
2288 GemmMicrokernelTester()
2289 .mr(1)
2290 .nr(8)
2291 .kr(2)
2292 .sr(1)
2293 .m(1)
2294 .n(8)
2295 .k(k)
2296 .ks(3)
2297 .a_offset(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08002298 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002299 }
2300 }
2301
2302 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, zero) {
2303 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08002304 for (size_t k = 1; k <= 80; k += 17) {
2305 for (uint32_t mz = 0; mz < 1; mz++) {
Frank Barchard42f5c502021-11-16 10:04:21 -08002306 GemmMicrokernelTester()
2307 .mr(1)
2308 .nr(8)
2309 .kr(2)
2310 .sr(1)
2311 .m(1)
2312 .n(8)
2313 .k(k)
2314 .ks(3)
2315 .a_offset(83)
2316 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08002317 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002318 }
2319 }
2320 }
2321
2322 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, qmin) {
2323 TEST_REQUIRES_ARM_NEON;
2324 GemmMicrokernelTester()
2325 .mr(1)
2326 .nr(8)
2327 .kr(2)
2328 .sr(1)
2329 .m(1)
2330 .n(8)
2331 .k(16)
2332 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002333 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002334 }
2335
2336 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, qmax) {
2337 TEST_REQUIRES_ARM_NEON;
2338 GemmMicrokernelTester()
2339 .mr(1)
2340 .nr(8)
2341 .kr(2)
2342 .sr(1)
2343 .m(1)
2344 .n(8)
2345 .k(16)
2346 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002347 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002348 }
2349
2350 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, strided_cm) {
2351 TEST_REQUIRES_ARM_NEON;
2352 GemmMicrokernelTester()
2353 .mr(1)
2354 .nr(8)
2355 .kr(2)
2356 .sr(1)
2357 .m(1)
2358 .n(8)
2359 .k(16)
2360 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08002361 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002362 }
2363#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2364
2365
2366#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Frank Barchard42f5c502021-11-16 10:04:21 -08002367 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, k_eq_16) {
2368 TEST_REQUIRES_ARM_NEON_V8;
2369 GemmMicrokernelTester()
2370 .mr(1)
2371 .nr(8)
2372 .kr(2)
2373 .sr(1)
2374 .m(1)
2375 .n(8)
2376 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08002377 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002378 }
2379
2380 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, strided_cn) {
2381 TEST_REQUIRES_ARM_NEON_V8;
2382 GemmMicrokernelTester()
2383 .mr(1)
2384 .nr(8)
2385 .kr(2)
2386 .sr(1)
2387 .m(1)
2388 .n(8)
2389 .k(16)
2390 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08002391 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002392 }
2393
2394 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, k_eq_16_subtile) {
2395 TEST_REQUIRES_ARM_NEON_V8;
Zhi An Ng83844ae2022-01-14 09:52:25 -08002396 for (uint32_t n = 1; n <= 8; n++) {
2397 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard42f5c502021-11-16 10:04:21 -08002398 GemmMicrokernelTester()
2399 .mr(1)
2400 .nr(8)
2401 .kr(2)
2402 .sr(1)
2403 .m(m)
2404 .n(n)
2405 .k(16)
2406 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002407 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002408 }
2409 }
2410 }
2411
2412 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, k_eq_16_subtile_m) {
2413 TEST_REQUIRES_ARM_NEON_V8;
2414 for (uint32_t m = 1; m <= 1; m++) {
2415 GemmMicrokernelTester()
2416 .mr(1)
2417 .nr(8)
2418 .kr(2)
2419 .sr(1)
2420 .m(m)
2421 .n(8)
2422 .k(16)
2423 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002424 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002425 }
2426 }
2427
2428 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, k_eq_16_subtile_n) {
2429 TEST_REQUIRES_ARM_NEON_V8;
2430 for (uint32_t n = 1; n <= 8; n++) {
2431 GemmMicrokernelTester()
2432 .mr(1)
2433 .nr(8)
2434 .kr(2)
2435 .sr(1)
2436 .m(1)
2437 .n(n)
2438 .k(16)
2439 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002440 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002441 }
2442 }
2443
2444 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, k_lt_16) {
2445 TEST_REQUIRES_ARM_NEON_V8;
2446 for (size_t k = 1; k < 16; k++) {
2447 GemmMicrokernelTester()
2448 .mr(1)
2449 .nr(8)
2450 .kr(2)
2451 .sr(1)
2452 .m(1)
2453 .n(8)
2454 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002455 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002456 }
2457 }
2458
2459 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, k_lt_16_subtile) {
2460 TEST_REQUIRES_ARM_NEON_V8;
2461 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002462 for (uint32_t n = 1; n <= 8; n++) {
2463 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard42f5c502021-11-16 10:04:21 -08002464 GemmMicrokernelTester()
2465 .mr(1)
2466 .nr(8)
2467 .kr(2)
2468 .sr(1)
2469 .m(m)
2470 .n(n)
2471 .k(k)
2472 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002473 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002474 }
2475 }
2476 }
2477 }
2478
2479 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, k_gt_16) {
2480 TEST_REQUIRES_ARM_NEON_V8;
2481 for (size_t k = 17; k < 32; k++) {
2482 GemmMicrokernelTester()
2483 .mr(1)
2484 .nr(8)
2485 .kr(2)
2486 .sr(1)
2487 .m(1)
2488 .n(8)
2489 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002490 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002491 }
2492 }
2493
2494 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, k_gt_16_subtile) {
2495 TEST_REQUIRES_ARM_NEON_V8;
2496 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002497 for (uint32_t n = 1; n <= 8; n++) {
2498 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard42f5c502021-11-16 10:04:21 -08002499 GemmMicrokernelTester()
2500 .mr(1)
2501 .nr(8)
2502 .kr(2)
2503 .sr(1)
2504 .m(m)
2505 .n(n)
2506 .k(k)
2507 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002508 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002509 }
2510 }
2511 }
2512 }
2513
2514 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, k_div_16) {
2515 TEST_REQUIRES_ARM_NEON_V8;
2516 for (size_t k = 32; k <= 160; k += 16) {
2517 GemmMicrokernelTester()
2518 .mr(1)
2519 .nr(8)
2520 .kr(2)
2521 .sr(1)
2522 .m(1)
2523 .n(8)
2524 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002525 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002526 }
2527 }
2528
2529 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, k_div_16_subtile) {
2530 TEST_REQUIRES_ARM_NEON_V8;
2531 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002532 for (uint32_t n = 1; n <= 8; n++) {
2533 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard42f5c502021-11-16 10:04:21 -08002534 GemmMicrokernelTester()
2535 .mr(1)
2536 .nr(8)
2537 .kr(2)
2538 .sr(1)
2539 .m(m)
2540 .n(n)
2541 .k(k)
2542 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002543 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002544 }
2545 }
2546 }
2547 }
2548
2549 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, n_gt_8) {
2550 TEST_REQUIRES_ARM_NEON_V8;
2551 for (uint32_t n = 9; n < 16; n++) {
2552 for (size_t k = 1; k <= 80; k += 17) {
2553 GemmMicrokernelTester()
2554 .mr(1)
2555 .nr(8)
2556 .kr(2)
2557 .sr(1)
2558 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002559 .n(n)
Frank Barchard42f5c502021-11-16 10:04:21 -08002560 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002561 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002562 }
2563 }
2564 }
2565
2566 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, n_gt_8_strided_cn) {
2567 TEST_REQUIRES_ARM_NEON_V8;
2568 for (uint32_t n = 9; n < 16; n++) {
2569 for (size_t k = 1; k <= 80; k += 17) {
2570 GemmMicrokernelTester()
2571 .mr(1)
2572 .nr(8)
2573 .kr(2)
2574 .sr(1)
2575 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002576 .n(n)
Frank Barchard42f5c502021-11-16 10:04:21 -08002577 .k(k)
2578 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08002579 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002580 }
2581 }
2582 }
2583
2584 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, n_gt_8_subtile) {
2585 TEST_REQUIRES_ARM_NEON_V8;
2586 for (uint32_t n = 9; n < 16; n++) {
2587 for (size_t k = 1; k <= 80; k += 17) {
2588 for (uint32_t m = 1; m <= 1; m++) {
2589 GemmMicrokernelTester()
2590 .mr(1)
2591 .nr(8)
2592 .kr(2)
2593 .sr(1)
2594 .m(m)
2595 .n(n)
2596 .k(k)
2597 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002598 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002599 }
2600 }
2601 }
2602 }
2603
2604 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, n_div_8) {
2605 TEST_REQUIRES_ARM_NEON_V8;
2606 for (uint32_t n = 16; n <= 24; n += 8) {
2607 for (size_t k = 1; k <= 80; k += 17) {
2608 GemmMicrokernelTester()
2609 .mr(1)
2610 .nr(8)
2611 .kr(2)
2612 .sr(1)
2613 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002614 .n(n)
Frank Barchard42f5c502021-11-16 10:04:21 -08002615 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002616 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002617 }
2618 }
2619 }
2620
2621 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, n_div_8_strided_cn) {
2622 TEST_REQUIRES_ARM_NEON_V8;
2623 for (uint32_t n = 16; n <= 24; n += 8) {
2624 for (size_t k = 1; k <= 80; k += 17) {
2625 GemmMicrokernelTester()
2626 .mr(1)
2627 .nr(8)
2628 .kr(2)
2629 .sr(1)
2630 .m(1)
2631 .n(n)
2632 .k(k)
2633 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08002634 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002635 }
2636 }
2637 }
2638
2639 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, n_div_8_subtile) {
2640 TEST_REQUIRES_ARM_NEON_V8;
2641 for (uint32_t n = 16; n <= 24; n += 8) {
2642 for (size_t k = 1; k <= 80; k += 17) {
2643 for (uint32_t m = 1; m <= 1; m++) {
2644 GemmMicrokernelTester()
2645 .mr(1)
2646 .nr(8)
2647 .kr(2)
2648 .sr(1)
2649 .m(m)
2650 .n(n)
2651 .k(k)
2652 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002653 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002654 }
2655 }
2656 }
2657 }
2658
2659 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, small_kernel) {
2660 TEST_REQUIRES_ARM_NEON_V8;
2661 for (size_t k = 1; k <= 80; k += 17) {
2662 GemmMicrokernelTester()
2663 .mr(1)
2664 .nr(8)
2665 .kr(2)
2666 .sr(1)
2667 .m(1)
2668 .n(8)
2669 .k(k)
2670 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08002671 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002672 }
2673 }
2674
2675 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, small_kernel_subtile) {
2676 TEST_REQUIRES_ARM_NEON_V8;
2677 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002678 for (uint32_t n = 1; n <= 8; n++) {
2679 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard42f5c502021-11-16 10:04:21 -08002680 GemmMicrokernelTester()
2681 .mr(1)
2682 .nr(8)
2683 .kr(2)
2684 .sr(1)
2685 .m(m)
2686 .n(n)
2687 .k(k)
2688 .ks(3)
2689 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002690 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002691 }
2692 }
2693 }
2694 }
2695
2696 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, n_gt_8_small_kernel) {
2697 TEST_REQUIRES_ARM_NEON_V8;
2698 for (uint32_t n = 9; n < 16; n++) {
2699 for (size_t k = 1; k <= 80; k += 17) {
2700 GemmMicrokernelTester()
2701 .mr(1)
2702 .nr(8)
2703 .kr(2)
2704 .sr(1)
2705 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002706 .n(n)
Frank Barchard42f5c502021-11-16 10:04:21 -08002707 .k(k)
2708 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08002709 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002710 }
2711 }
2712 }
2713
2714 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, n_div_8_small_kernel) {
2715 TEST_REQUIRES_ARM_NEON_V8;
2716 for (uint32_t n = 16; n <= 24; n += 8) {
2717 for (size_t k = 1; k <= 80; k += 17) {
2718 GemmMicrokernelTester()
2719 .mr(1)
2720 .nr(8)
2721 .kr(2)
2722 .sr(1)
2723 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002724 .n(n)
Frank Barchard42f5c502021-11-16 10:04:21 -08002725 .k(k)
2726 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08002727 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002728 }
2729 }
2730 }
2731
2732 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, strided_cm_subtile) {
2733 TEST_REQUIRES_ARM_NEON_V8;
2734 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002735 for (uint32_t n = 1; n <= 8; n++) {
2736 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard42f5c502021-11-16 10:04:21 -08002737 GemmMicrokernelTester()
2738 .mr(1)
2739 .nr(8)
2740 .kr(2)
2741 .sr(1)
2742 .m(m)
2743 .n(n)
2744 .k(k)
2745 .cm_stride(11)
2746 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002747 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002748 }
2749 }
2750 }
2751 }
2752
2753 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, a_offset) {
2754 TEST_REQUIRES_ARM_NEON_V8;
2755 for (size_t k = 1; k <= 80; k += 17) {
2756 GemmMicrokernelTester()
2757 .mr(1)
2758 .nr(8)
2759 .kr(2)
2760 .sr(1)
2761 .m(1)
2762 .n(8)
2763 .k(k)
2764 .ks(3)
2765 .a_offset(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08002766 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002767 }
2768 }
2769
2770 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, zero) {
2771 TEST_REQUIRES_ARM_NEON_V8;
Zhi An Ng83844ae2022-01-14 09:52:25 -08002772 for (size_t k = 1; k <= 80; k += 17) {
2773 for (uint32_t mz = 0; mz < 1; mz++) {
Frank Barchard42f5c502021-11-16 10:04:21 -08002774 GemmMicrokernelTester()
2775 .mr(1)
2776 .nr(8)
2777 .kr(2)
2778 .sr(1)
2779 .m(1)
2780 .n(8)
2781 .k(k)
2782 .ks(3)
2783 .a_offset(83)
2784 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08002785 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002786 }
2787 }
2788 }
2789
2790 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, qmin) {
2791 TEST_REQUIRES_ARM_NEON_V8;
2792 GemmMicrokernelTester()
2793 .mr(1)
2794 .nr(8)
2795 .kr(2)
2796 .sr(1)
2797 .m(1)
2798 .n(8)
2799 .k(16)
2800 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002801 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002802 }
2803
2804 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, qmax) {
2805 TEST_REQUIRES_ARM_NEON_V8;
2806 GemmMicrokernelTester()
2807 .mr(1)
2808 .nr(8)
2809 .kr(2)
2810 .sr(1)
2811 .m(1)
2812 .n(8)
2813 .k(16)
2814 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002815 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002816 }
2817
2818 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, strided_cm) {
2819 TEST_REQUIRES_ARM_NEON_V8;
2820 GemmMicrokernelTester()
2821 .mr(1)
2822 .nr(8)
2823 .kr(2)
2824 .sr(1)
2825 .m(1)
2826 .n(8)
2827 .k(16)
2828 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08002829 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002830 }
2831#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2832
2833
2834#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Frank Barcharde22685a2021-11-12 11:36:58 -08002835 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, k_eq_16) {
Frank Barchardeb704f72021-11-12 01:26:50 -08002836 TEST_REQUIRES_ARM_NEON;
2837 GemmMicrokernelTester()
2838 .mr(1)
2839 .nr(8)
2840 .kr(4)
2841 .sr(2)
2842 .m(1)
2843 .n(8)
2844 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08002845 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08002846 }
2847
Frank Barcharde22685a2021-11-12 11:36:58 -08002848 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, strided_cn) {
Frank Barchardeb704f72021-11-12 01:26:50 -08002849 TEST_REQUIRES_ARM_NEON;
2850 GemmMicrokernelTester()
2851 .mr(1)
2852 .nr(8)
2853 .kr(4)
2854 .sr(2)
2855 .m(1)
2856 .n(8)
2857 .k(16)
2858 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08002859 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08002860 }
2861
Frank Barcharde22685a2021-11-12 11:36:58 -08002862 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, k_eq_16_subtile) {
Frank Barchardeb704f72021-11-12 01:26:50 -08002863 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08002864 for (uint32_t n = 1; n <= 8; n++) {
2865 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchardeb704f72021-11-12 01:26:50 -08002866 GemmMicrokernelTester()
2867 .mr(1)
2868 .nr(8)
2869 .kr(4)
2870 .sr(2)
2871 .m(m)
2872 .n(n)
2873 .k(16)
2874 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002875 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08002876 }
2877 }
2878 }
2879
Frank Barcharde22685a2021-11-12 11:36:58 -08002880 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, k_eq_16_subtile_m) {
Frank Barchardeb704f72021-11-12 01:26:50 -08002881 TEST_REQUIRES_ARM_NEON;
2882 for (uint32_t m = 1; m <= 1; m++) {
2883 GemmMicrokernelTester()
2884 .mr(1)
2885 .nr(8)
2886 .kr(4)
2887 .sr(2)
2888 .m(m)
2889 .n(8)
2890 .k(16)
2891 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002892 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08002893 }
2894 }
2895
Frank Barcharde22685a2021-11-12 11:36:58 -08002896 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, k_eq_16_subtile_n) {
Frank Barchardeb704f72021-11-12 01:26:50 -08002897 TEST_REQUIRES_ARM_NEON;
2898 for (uint32_t n = 1; n <= 8; n++) {
2899 GemmMicrokernelTester()
2900 .mr(1)
2901 .nr(8)
2902 .kr(4)
2903 .sr(2)
2904 .m(1)
2905 .n(n)
2906 .k(16)
2907 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002908 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08002909 }
2910 }
2911
Frank Barcharde22685a2021-11-12 11:36:58 -08002912 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, k_lt_16) {
Frank Barchardeb704f72021-11-12 01:26:50 -08002913 TEST_REQUIRES_ARM_NEON;
2914 for (size_t k = 1; k < 16; k++) {
2915 GemmMicrokernelTester()
2916 .mr(1)
2917 .nr(8)
2918 .kr(4)
2919 .sr(2)
2920 .m(1)
2921 .n(8)
2922 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002923 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08002924 }
2925 }
2926
Frank Barcharde22685a2021-11-12 11:36:58 -08002927 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, k_lt_16_subtile) {
Frank Barchardeb704f72021-11-12 01:26:50 -08002928 TEST_REQUIRES_ARM_NEON;
2929 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002930 for (uint32_t n = 1; n <= 8; n++) {
2931 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchardeb704f72021-11-12 01:26:50 -08002932 GemmMicrokernelTester()
2933 .mr(1)
2934 .nr(8)
2935 .kr(4)
2936 .sr(2)
2937 .m(m)
2938 .n(n)
2939 .k(k)
2940 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002941 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08002942 }
2943 }
2944 }
2945 }
2946
Frank Barcharde22685a2021-11-12 11:36:58 -08002947 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, k_gt_16) {
Frank Barchardeb704f72021-11-12 01:26:50 -08002948 TEST_REQUIRES_ARM_NEON;
2949 for (size_t k = 17; k < 32; k++) {
2950 GemmMicrokernelTester()
2951 .mr(1)
2952 .nr(8)
2953 .kr(4)
2954 .sr(2)
2955 .m(1)
2956 .n(8)
2957 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002958 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08002959 }
2960 }
2961
Frank Barcharde22685a2021-11-12 11:36:58 -08002962 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, k_gt_16_subtile) {
Frank Barchardeb704f72021-11-12 01:26:50 -08002963 TEST_REQUIRES_ARM_NEON;
2964 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002965 for (uint32_t n = 1; n <= 8; n++) {
2966 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchardeb704f72021-11-12 01:26:50 -08002967 GemmMicrokernelTester()
2968 .mr(1)
2969 .nr(8)
2970 .kr(4)
2971 .sr(2)
2972 .m(m)
2973 .n(n)
2974 .k(k)
2975 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002976 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08002977 }
2978 }
2979 }
2980 }
2981
Frank Barcharde22685a2021-11-12 11:36:58 -08002982 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, k_div_16) {
Frank Barchardeb704f72021-11-12 01:26:50 -08002983 TEST_REQUIRES_ARM_NEON;
2984 for (size_t k = 32; k <= 160; k += 16) {
2985 GemmMicrokernelTester()
2986 .mr(1)
2987 .nr(8)
2988 .kr(4)
2989 .sr(2)
2990 .m(1)
2991 .n(8)
2992 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002993 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08002994 }
2995 }
2996
Frank Barcharde22685a2021-11-12 11:36:58 -08002997 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, k_div_16_subtile) {
Frank Barchardeb704f72021-11-12 01:26:50 -08002998 TEST_REQUIRES_ARM_NEON;
2999 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003000 for (uint32_t n = 1; n <= 8; n++) {
3001 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchardeb704f72021-11-12 01:26:50 -08003002 GemmMicrokernelTester()
3003 .mr(1)
3004 .nr(8)
3005 .kr(4)
3006 .sr(2)
3007 .m(m)
3008 .n(n)
3009 .k(k)
3010 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003011 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08003012 }
3013 }
3014 }
3015 }
3016
Frank Barcharde22685a2021-11-12 11:36:58 -08003017 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, n_gt_8) {
Frank Barchardeb704f72021-11-12 01:26:50 -08003018 TEST_REQUIRES_ARM_NEON;
3019 for (uint32_t n = 9; n < 16; n++) {
3020 for (size_t k = 1; k <= 80; k += 17) {
3021 GemmMicrokernelTester()
3022 .mr(1)
3023 .nr(8)
3024 .kr(4)
3025 .sr(2)
3026 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003027 .n(n)
Frank Barchardeb704f72021-11-12 01:26:50 -08003028 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003029 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08003030 }
3031 }
3032 }
3033
Frank Barcharde22685a2021-11-12 11:36:58 -08003034 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, n_gt_8_strided_cn) {
Frank Barchardeb704f72021-11-12 01:26:50 -08003035 TEST_REQUIRES_ARM_NEON;
3036 for (uint32_t n = 9; n < 16; n++) {
3037 for (size_t k = 1; k <= 80; k += 17) {
3038 GemmMicrokernelTester()
3039 .mr(1)
3040 .nr(8)
3041 .kr(4)
3042 .sr(2)
3043 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003044 .n(n)
Frank Barchardeb704f72021-11-12 01:26:50 -08003045 .k(k)
3046 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08003047 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08003048 }
3049 }
3050 }
3051
Frank Barcharde22685a2021-11-12 11:36:58 -08003052 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, n_gt_8_subtile) {
Frank Barchardeb704f72021-11-12 01:26:50 -08003053 TEST_REQUIRES_ARM_NEON;
3054 for (uint32_t n = 9; n < 16; n++) {
3055 for (size_t k = 1; k <= 80; k += 17) {
3056 for (uint32_t m = 1; m <= 1; m++) {
3057 GemmMicrokernelTester()
3058 .mr(1)
3059 .nr(8)
3060 .kr(4)
3061 .sr(2)
3062 .m(m)
3063 .n(n)
3064 .k(k)
3065 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003066 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08003067 }
3068 }
3069 }
3070 }
3071
Frank Barcharde22685a2021-11-12 11:36:58 -08003072 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, n_div_8) {
Frank Barchardeb704f72021-11-12 01:26:50 -08003073 TEST_REQUIRES_ARM_NEON;
3074 for (uint32_t n = 16; n <= 24; n += 8) {
3075 for (size_t k = 1; k <= 80; k += 17) {
3076 GemmMicrokernelTester()
3077 .mr(1)
3078 .nr(8)
3079 .kr(4)
3080 .sr(2)
3081 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003082 .n(n)
Frank Barchardeb704f72021-11-12 01:26:50 -08003083 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003084 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08003085 }
3086 }
3087 }
3088
Frank Barcharde22685a2021-11-12 11:36:58 -08003089 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, n_div_8_strided_cn) {
Frank Barchardeb704f72021-11-12 01:26:50 -08003090 TEST_REQUIRES_ARM_NEON;
3091 for (uint32_t n = 16; n <= 24; n += 8) {
3092 for (size_t k = 1; k <= 80; k += 17) {
3093 GemmMicrokernelTester()
3094 .mr(1)
3095 .nr(8)
3096 .kr(4)
3097 .sr(2)
3098 .m(1)
3099 .n(n)
3100 .k(k)
3101 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08003102 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08003103 }
3104 }
3105 }
3106
Frank Barcharde22685a2021-11-12 11:36:58 -08003107 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, n_div_8_subtile) {
Frank Barchardeb704f72021-11-12 01:26:50 -08003108 TEST_REQUIRES_ARM_NEON;
3109 for (uint32_t n = 16; n <= 24; n += 8) {
3110 for (size_t k = 1; k <= 80; k += 17) {
3111 for (uint32_t m = 1; m <= 1; m++) {
3112 GemmMicrokernelTester()
3113 .mr(1)
3114 .nr(8)
3115 .kr(4)
3116 .sr(2)
3117 .m(m)
3118 .n(n)
3119 .k(k)
3120 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003121 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08003122 }
3123 }
3124 }
3125 }
3126
Frank Barcharde22685a2021-11-12 11:36:58 -08003127 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, small_kernel) {
Frank Barchardeb704f72021-11-12 01:26:50 -08003128 TEST_REQUIRES_ARM_NEON;
3129 for (size_t k = 1; k <= 80; k += 17) {
3130 GemmMicrokernelTester()
3131 .mr(1)
3132 .nr(8)
3133 .kr(4)
3134 .sr(2)
3135 .m(1)
3136 .n(8)
3137 .k(k)
3138 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08003139 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08003140 }
3141 }
3142
Frank Barcharde22685a2021-11-12 11:36:58 -08003143 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, small_kernel_subtile) {
Frank Barchardeb704f72021-11-12 01:26:50 -08003144 TEST_REQUIRES_ARM_NEON;
3145 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003146 for (uint32_t n = 1; n <= 8; n++) {
3147 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchardeb704f72021-11-12 01:26:50 -08003148 GemmMicrokernelTester()
3149 .mr(1)
3150 .nr(8)
3151 .kr(4)
3152 .sr(2)
3153 .m(m)
3154 .n(n)
3155 .k(k)
3156 .ks(3)
3157 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003158 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08003159 }
3160 }
3161 }
3162 }
3163
Frank Barcharde22685a2021-11-12 11:36:58 -08003164 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, n_gt_8_small_kernel) {
Frank Barchardeb704f72021-11-12 01:26:50 -08003165 TEST_REQUIRES_ARM_NEON;
3166 for (uint32_t n = 9; n < 16; n++) {
3167 for (size_t k = 1; k <= 80; k += 17) {
3168 GemmMicrokernelTester()
3169 .mr(1)
3170 .nr(8)
3171 .kr(4)
3172 .sr(2)
3173 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003174 .n(n)
Frank Barchardeb704f72021-11-12 01:26:50 -08003175 .k(k)
3176 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08003177 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08003178 }
3179 }
3180 }
3181
Frank Barcharde22685a2021-11-12 11:36:58 -08003182 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, n_div_8_small_kernel) {
Frank Barchardeb704f72021-11-12 01:26:50 -08003183 TEST_REQUIRES_ARM_NEON;
3184 for (uint32_t n = 16; n <= 24; n += 8) {
3185 for (size_t k = 1; k <= 80; k += 17) {
3186 GemmMicrokernelTester()
3187 .mr(1)
3188 .nr(8)
3189 .kr(4)
3190 .sr(2)
3191 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003192 .n(n)
Frank Barchardeb704f72021-11-12 01:26:50 -08003193 .k(k)
3194 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08003195 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08003196 }
3197 }
3198 }
3199
Frank Barcharde22685a2021-11-12 11:36:58 -08003200 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, strided_cm_subtile) {
Frank Barchardeb704f72021-11-12 01:26:50 -08003201 TEST_REQUIRES_ARM_NEON;
3202 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003203 for (uint32_t n = 1; n <= 8; n++) {
3204 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchardeb704f72021-11-12 01:26:50 -08003205 GemmMicrokernelTester()
3206 .mr(1)
3207 .nr(8)
3208 .kr(4)
3209 .sr(2)
3210 .m(m)
3211 .n(n)
3212 .k(k)
3213 .cm_stride(11)
3214 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003215 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08003216 }
3217 }
3218 }
3219 }
3220
Frank Barcharde22685a2021-11-12 11:36:58 -08003221 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, a_offset) {
Frank Barchardeb704f72021-11-12 01:26:50 -08003222 TEST_REQUIRES_ARM_NEON;
3223 for (size_t k = 1; k <= 80; k += 17) {
3224 GemmMicrokernelTester()
3225 .mr(1)
3226 .nr(8)
3227 .kr(4)
3228 .sr(2)
3229 .m(1)
3230 .n(8)
3231 .k(k)
3232 .ks(3)
3233 .a_offset(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08003234 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08003235 }
3236 }
3237
Frank Barcharde22685a2021-11-12 11:36:58 -08003238 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, zero) {
Frank Barchardeb704f72021-11-12 01:26:50 -08003239 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08003240 for (size_t k = 1; k <= 80; k += 17) {
3241 for (uint32_t mz = 0; mz < 1; mz++) {
Frank Barchardeb704f72021-11-12 01:26:50 -08003242 GemmMicrokernelTester()
3243 .mr(1)
3244 .nr(8)
3245 .kr(4)
3246 .sr(2)
3247 .m(1)
3248 .n(8)
3249 .k(k)
3250 .ks(3)
3251 .a_offset(83)
3252 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08003253 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08003254 }
3255 }
3256 }
3257
Frank Barcharde22685a2021-11-12 11:36:58 -08003258 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, qmin) {
Frank Barchardeb704f72021-11-12 01:26:50 -08003259 TEST_REQUIRES_ARM_NEON;
3260 GemmMicrokernelTester()
3261 .mr(1)
3262 .nr(8)
3263 .kr(4)
3264 .sr(2)
3265 .m(1)
3266 .n(8)
3267 .k(16)
3268 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003269 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08003270 }
3271
Frank Barcharde22685a2021-11-12 11:36:58 -08003272 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, qmax) {
Frank Barchardeb704f72021-11-12 01:26:50 -08003273 TEST_REQUIRES_ARM_NEON;
3274 GemmMicrokernelTester()
3275 .mr(1)
3276 .nr(8)
3277 .kr(4)
3278 .sr(2)
3279 .m(1)
3280 .n(8)
3281 .k(16)
3282 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003283 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08003284 }
3285
Frank Barcharde22685a2021-11-12 11:36:58 -08003286 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, strided_cm) {
Frank Barchardeb704f72021-11-12 01:26:50 -08003287 TEST_REQUIRES_ARM_NEON;
3288 GemmMicrokernelTester()
3289 .mr(1)
3290 .nr(8)
3291 .kr(4)
3292 .sr(2)
3293 .m(1)
3294 .n(8)
3295 .k(16)
3296 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08003297 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08003298 }
3299#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
3300
3301
3302#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Frank Barcharde22685a2021-11-12 11:36:58 -08003303 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, k_eq_16) {
Frank Barchardeb704f72021-11-12 01:26:50 -08003304 TEST_REQUIRES_ARM_NEON;
3305 GemmMicrokernelTester()
3306 .mr(2)
3307 .nr(8)
3308 .kr(4)
3309 .sr(2)
3310 .m(2)
3311 .n(8)
3312 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08003313 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08003314 }
3315
Frank Barcharde22685a2021-11-12 11:36:58 -08003316 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, strided_cn) {
Frank Barchardeb704f72021-11-12 01:26:50 -08003317 TEST_REQUIRES_ARM_NEON;
3318 GemmMicrokernelTester()
3319 .mr(2)
3320 .nr(8)
3321 .kr(4)
3322 .sr(2)
3323 .m(2)
3324 .n(8)
3325 .k(16)
3326 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08003327 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08003328 }
3329
Frank Barcharde22685a2021-11-12 11:36:58 -08003330 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, k_eq_16_subtile) {
Frank Barchardeb704f72021-11-12 01:26:50 -08003331 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08003332 for (uint32_t n = 1; n <= 8; n++) {
3333 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchardeb704f72021-11-12 01:26:50 -08003334 GemmMicrokernelTester()
3335 .mr(2)
3336 .nr(8)
3337 .kr(4)
3338 .sr(2)
3339 .m(m)
3340 .n(n)
3341 .k(16)
3342 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003343 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08003344 }
3345 }
3346 }
3347
Frank Barcharde22685a2021-11-12 11:36:58 -08003348 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, k_eq_16_subtile_m) {
Frank Barchardeb704f72021-11-12 01:26:50 -08003349 TEST_REQUIRES_ARM_NEON;
3350 for (uint32_t m = 1; m <= 2; m++) {
3351 GemmMicrokernelTester()
3352 .mr(2)
3353 .nr(8)
3354 .kr(4)
3355 .sr(2)
3356 .m(m)
3357 .n(8)
3358 .k(16)
3359 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003360 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08003361 }
3362 }
3363
Frank Barcharde22685a2021-11-12 11:36:58 -08003364 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, k_eq_16_subtile_n) {
Frank Barchardeb704f72021-11-12 01:26:50 -08003365 TEST_REQUIRES_ARM_NEON;
3366 for (uint32_t n = 1; n <= 8; n++) {
3367 GemmMicrokernelTester()
3368 .mr(2)
3369 .nr(8)
3370 .kr(4)
3371 .sr(2)
3372 .m(2)
3373 .n(n)
3374 .k(16)
3375 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003376 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08003377 }
3378 }
3379
Frank Barcharde22685a2021-11-12 11:36:58 -08003380 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, k_lt_16) {
Frank Barchardeb704f72021-11-12 01:26:50 -08003381 TEST_REQUIRES_ARM_NEON;
3382 for (size_t k = 1; k < 16; k++) {
3383 GemmMicrokernelTester()
3384 .mr(2)
3385 .nr(8)
3386 .kr(4)
3387 .sr(2)
3388 .m(2)
3389 .n(8)
3390 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003391 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08003392 }
3393 }
3394
Frank Barcharde22685a2021-11-12 11:36:58 -08003395 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, k_lt_16_subtile) {
Frank Barchardeb704f72021-11-12 01:26:50 -08003396 TEST_REQUIRES_ARM_NEON;
3397 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003398 for (uint32_t n = 1; n <= 8; n++) {
3399 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchardeb704f72021-11-12 01:26:50 -08003400 GemmMicrokernelTester()
3401 .mr(2)
3402 .nr(8)
3403 .kr(4)
3404 .sr(2)
3405 .m(m)
3406 .n(n)
3407 .k(k)
3408 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003409 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08003410 }
3411 }
3412 }
3413 }
3414
Frank Barcharde22685a2021-11-12 11:36:58 -08003415 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, k_gt_16) {
Frank Barchardeb704f72021-11-12 01:26:50 -08003416 TEST_REQUIRES_ARM_NEON;
3417 for (size_t k = 17; k < 32; k++) {
3418 GemmMicrokernelTester()
3419 .mr(2)
3420 .nr(8)
3421 .kr(4)
3422 .sr(2)
3423 .m(2)
3424 .n(8)
3425 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003426 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08003427 }
3428 }
3429
Frank Barcharde22685a2021-11-12 11:36:58 -08003430 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, k_gt_16_subtile) {
Frank Barchardeb704f72021-11-12 01:26:50 -08003431 TEST_REQUIRES_ARM_NEON;
3432 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003433 for (uint32_t n = 1; n <= 8; n++) {
3434 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchardeb704f72021-11-12 01:26:50 -08003435 GemmMicrokernelTester()
3436 .mr(2)
3437 .nr(8)
3438 .kr(4)
3439 .sr(2)
3440 .m(m)
3441 .n(n)
3442 .k(k)
3443 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003444 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08003445 }
3446 }
3447 }
3448 }
3449
Frank Barcharde22685a2021-11-12 11:36:58 -08003450 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, k_div_16) {
Frank Barchardeb704f72021-11-12 01:26:50 -08003451 TEST_REQUIRES_ARM_NEON;
3452 for (size_t k = 32; k <= 160; k += 16) {
3453 GemmMicrokernelTester()
3454 .mr(2)
3455 .nr(8)
3456 .kr(4)
3457 .sr(2)
3458 .m(2)
3459 .n(8)
3460 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003461 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08003462 }
3463 }
3464
Frank Barcharde22685a2021-11-12 11:36:58 -08003465 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, k_div_16_subtile) {
Frank Barchardeb704f72021-11-12 01:26:50 -08003466 TEST_REQUIRES_ARM_NEON;
3467 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003468 for (uint32_t n = 1; n <= 8; n++) {
3469 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchardeb704f72021-11-12 01:26:50 -08003470 GemmMicrokernelTester()
3471 .mr(2)
3472 .nr(8)
3473 .kr(4)
3474 .sr(2)
3475 .m(m)
3476 .n(n)
3477 .k(k)
3478 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003479 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08003480 }
3481 }
3482 }
3483 }
3484
Frank Barcharde22685a2021-11-12 11:36:58 -08003485 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, n_gt_8) {
Frank Barchardeb704f72021-11-12 01:26:50 -08003486 TEST_REQUIRES_ARM_NEON;
3487 for (uint32_t n = 9; n < 16; n++) {
3488 for (size_t k = 1; k <= 80; k += 17) {
3489 GemmMicrokernelTester()
3490 .mr(2)
3491 .nr(8)
3492 .kr(4)
3493 .sr(2)
3494 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003495 .n(n)
Frank Barchardeb704f72021-11-12 01:26:50 -08003496 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003497 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08003498 }
3499 }
3500 }
3501
Frank Barcharde22685a2021-11-12 11:36:58 -08003502 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, n_gt_8_strided_cn) {
Frank Barchardeb704f72021-11-12 01:26:50 -08003503 TEST_REQUIRES_ARM_NEON;
3504 for (uint32_t n = 9; n < 16; n++) {
3505 for (size_t k = 1; k <= 80; k += 17) {
3506 GemmMicrokernelTester()
3507 .mr(2)
3508 .nr(8)
3509 .kr(4)
3510 .sr(2)
3511 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003512 .n(n)
Frank Barchardeb704f72021-11-12 01:26:50 -08003513 .k(k)
3514 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08003515 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08003516 }
3517 }
3518 }
3519
Frank Barcharde22685a2021-11-12 11:36:58 -08003520 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, n_gt_8_subtile) {
Frank Barchardeb704f72021-11-12 01:26:50 -08003521 TEST_REQUIRES_ARM_NEON;
3522 for (uint32_t n = 9; n < 16; n++) {
3523 for (size_t k = 1; k <= 80; k += 17) {
3524 for (uint32_t m = 1; m <= 2; m++) {
3525 GemmMicrokernelTester()
3526 .mr(2)
3527 .nr(8)
3528 .kr(4)
3529 .sr(2)
3530 .m(m)
3531 .n(n)
3532 .k(k)
3533 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003534 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08003535 }
3536 }
3537 }
3538 }
3539
Frank Barcharde22685a2021-11-12 11:36:58 -08003540 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, n_div_8) {
Frank Barchardeb704f72021-11-12 01:26:50 -08003541 TEST_REQUIRES_ARM_NEON;
3542 for (uint32_t n = 16; n <= 24; n += 8) {
3543 for (size_t k = 1; k <= 80; k += 17) {
3544 GemmMicrokernelTester()
3545 .mr(2)
3546 .nr(8)
3547 .kr(4)
3548 .sr(2)
3549 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003550 .n(n)
Frank Barchardeb704f72021-11-12 01:26:50 -08003551 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003552 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08003553 }
3554 }
3555 }
3556
Frank Barcharde22685a2021-11-12 11:36:58 -08003557 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, n_div_8_strided_cn) {
Frank Barchardeb704f72021-11-12 01:26:50 -08003558 TEST_REQUIRES_ARM_NEON;
3559 for (uint32_t n = 16; n <= 24; n += 8) {
3560 for (size_t k = 1; k <= 80; k += 17) {
3561 GemmMicrokernelTester()
3562 .mr(2)
3563 .nr(8)
3564 .kr(4)
3565 .sr(2)
3566 .m(2)
3567 .n(n)
3568 .k(k)
3569 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08003570 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08003571 }
3572 }
3573 }
3574
Frank Barcharde22685a2021-11-12 11:36:58 -08003575 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, n_div_8_subtile) {
Frank Barchardeb704f72021-11-12 01:26:50 -08003576 TEST_REQUIRES_ARM_NEON;
3577 for (uint32_t n = 16; n <= 24; n += 8) {
3578 for (size_t k = 1; k <= 80; k += 17) {
3579 for (uint32_t m = 1; m <= 2; m++) {
3580 GemmMicrokernelTester()
3581 .mr(2)
3582 .nr(8)
3583 .kr(4)
3584 .sr(2)
3585 .m(m)
3586 .n(n)
3587 .k(k)
3588 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003589 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08003590 }
3591 }
3592 }
3593 }
3594
Frank Barcharde22685a2021-11-12 11:36:58 -08003595 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, small_kernel) {
Frank Barchardeb704f72021-11-12 01:26:50 -08003596 TEST_REQUIRES_ARM_NEON;
3597 for (size_t k = 1; k <= 80; k += 17) {
3598 GemmMicrokernelTester()
3599 .mr(2)
3600 .nr(8)
3601 .kr(4)
3602 .sr(2)
3603 .m(2)
3604 .n(8)
3605 .k(k)
3606 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08003607 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08003608 }
3609 }
3610
Frank Barcharde22685a2021-11-12 11:36:58 -08003611 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, small_kernel_subtile) {
Frank Barchardeb704f72021-11-12 01:26:50 -08003612 TEST_REQUIRES_ARM_NEON;
3613 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003614 for (uint32_t n = 1; n <= 8; n++) {
3615 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchardeb704f72021-11-12 01:26:50 -08003616 GemmMicrokernelTester()
3617 .mr(2)
3618 .nr(8)
3619 .kr(4)
3620 .sr(2)
3621 .m(m)
3622 .n(n)
3623 .k(k)
3624 .ks(3)
3625 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003626 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08003627 }
3628 }
3629 }
3630 }
3631
Frank Barcharde22685a2021-11-12 11:36:58 -08003632 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, n_gt_8_small_kernel) {
Frank Barchardeb704f72021-11-12 01:26:50 -08003633 TEST_REQUIRES_ARM_NEON;
3634 for (uint32_t n = 9; n < 16; n++) {
3635 for (size_t k = 1; k <= 80; k += 17) {
3636 GemmMicrokernelTester()
3637 .mr(2)
3638 .nr(8)
3639 .kr(4)
3640 .sr(2)
3641 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003642 .n(n)
Frank Barchardeb704f72021-11-12 01:26:50 -08003643 .k(k)
3644 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08003645 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08003646 }
3647 }
3648 }
3649
Frank Barcharde22685a2021-11-12 11:36:58 -08003650 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, n_div_8_small_kernel) {
Frank Barchardeb704f72021-11-12 01:26:50 -08003651 TEST_REQUIRES_ARM_NEON;
3652 for (uint32_t n = 16; n <= 24; n += 8) {
3653 for (size_t k = 1; k <= 80; k += 17) {
3654 GemmMicrokernelTester()
3655 .mr(2)
3656 .nr(8)
3657 .kr(4)
3658 .sr(2)
3659 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003660 .n(n)
Frank Barchardeb704f72021-11-12 01:26:50 -08003661 .k(k)
3662 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08003663 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08003664 }
3665 }
3666 }
3667
Frank Barcharde22685a2021-11-12 11:36:58 -08003668 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, strided_cm_subtile) {
Frank Barchardeb704f72021-11-12 01:26:50 -08003669 TEST_REQUIRES_ARM_NEON;
3670 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003671 for (uint32_t n = 1; n <= 8; n++) {
3672 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchardeb704f72021-11-12 01:26:50 -08003673 GemmMicrokernelTester()
3674 .mr(2)
3675 .nr(8)
3676 .kr(4)
3677 .sr(2)
3678 .m(m)
3679 .n(n)
3680 .k(k)
3681 .cm_stride(11)
3682 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003683 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08003684 }
3685 }
3686 }
3687 }
3688
Frank Barcharde22685a2021-11-12 11:36:58 -08003689 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, a_offset) {
Frank Barchardeb704f72021-11-12 01:26:50 -08003690 TEST_REQUIRES_ARM_NEON;
3691 for (size_t k = 1; k <= 80; k += 17) {
3692 GemmMicrokernelTester()
3693 .mr(2)
3694 .nr(8)
3695 .kr(4)
3696 .sr(2)
3697 .m(2)
3698 .n(8)
3699 .k(k)
3700 .ks(3)
3701 .a_offset(163)
Marat Dukhan50323b82022-01-11 00:12:01 -08003702 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08003703 }
3704 }
3705
Frank Barcharde22685a2021-11-12 11:36:58 -08003706 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, zero) {
Frank Barchardeb704f72021-11-12 01:26:50 -08003707 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08003708 for (size_t k = 1; k <= 80; k += 17) {
3709 for (uint32_t mz = 0; mz < 2; mz++) {
Frank Barchardeb704f72021-11-12 01:26:50 -08003710 GemmMicrokernelTester()
3711 .mr(2)
3712 .nr(8)
3713 .kr(4)
3714 .sr(2)
3715 .m(2)
3716 .n(8)
3717 .k(k)
3718 .ks(3)
3719 .a_offset(163)
3720 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08003721 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08003722 }
3723 }
3724 }
3725
Frank Barcharde22685a2021-11-12 11:36:58 -08003726 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, qmin) {
Frank Barchardeb704f72021-11-12 01:26:50 -08003727 TEST_REQUIRES_ARM_NEON;
3728 GemmMicrokernelTester()
3729 .mr(2)
3730 .nr(8)
3731 .kr(4)
3732 .sr(2)
3733 .m(2)
3734 .n(8)
3735 .k(16)
3736 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003737 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08003738 }
3739
Frank Barcharde22685a2021-11-12 11:36:58 -08003740 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, qmax) {
Frank Barchardeb704f72021-11-12 01:26:50 -08003741 TEST_REQUIRES_ARM_NEON;
3742 GemmMicrokernelTester()
3743 .mr(2)
3744 .nr(8)
3745 .kr(4)
3746 .sr(2)
3747 .m(2)
3748 .n(8)
3749 .k(16)
3750 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003751 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08003752 }
3753
Frank Barcharde22685a2021-11-12 11:36:58 -08003754 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, strided_cm) {
Frank Barchardeb704f72021-11-12 01:26:50 -08003755 TEST_REQUIRES_ARM_NEON;
3756 GemmMicrokernelTester()
3757 .mr(2)
3758 .nr(8)
3759 .kr(4)
3760 .sr(2)
3761 .m(2)
3762 .n(8)
3763 .k(16)
3764 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08003765 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08003766 }
3767#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
3768
3769
3770#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Frank Barcharde22685a2021-11-12 11:36:58 -08003771 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, k_eq_16) {
Frank Barchardeb704f72021-11-12 01:26:50 -08003772 TEST_REQUIRES_ARM_NEON_V8;
3773 GemmMicrokernelTester()
3774 .mr(1)
3775 .nr(8)
3776 .kr(4)
3777 .sr(2)
3778 .m(1)
3779 .n(8)
3780 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08003781 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08003782 }
3783
Frank Barcharde22685a2021-11-12 11:36:58 -08003784 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, strided_cn) {
Frank Barchardeb704f72021-11-12 01:26:50 -08003785 TEST_REQUIRES_ARM_NEON_V8;
3786 GemmMicrokernelTester()
3787 .mr(1)
3788 .nr(8)
3789 .kr(4)
3790 .sr(2)
3791 .m(1)
3792 .n(8)
3793 .k(16)
3794 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08003795 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08003796 }
3797
Frank Barcharde22685a2021-11-12 11:36:58 -08003798 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, k_eq_16_subtile) {
Frank Barchardeb704f72021-11-12 01:26:50 -08003799 TEST_REQUIRES_ARM_NEON_V8;
Zhi An Ng83844ae2022-01-14 09:52:25 -08003800 for (uint32_t n = 1; n <= 8; n++) {
3801 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchardeb704f72021-11-12 01:26:50 -08003802 GemmMicrokernelTester()
3803 .mr(1)
3804 .nr(8)
3805 .kr(4)
3806 .sr(2)
3807 .m(m)
3808 .n(n)
3809 .k(16)
3810 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003811 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08003812 }
3813 }
3814 }
3815
Frank Barcharde22685a2021-11-12 11:36:58 -08003816 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, k_eq_16_subtile_m) {
Frank Barchardeb704f72021-11-12 01:26:50 -08003817 TEST_REQUIRES_ARM_NEON_V8;
3818 for (uint32_t m = 1; m <= 1; m++) {
3819 GemmMicrokernelTester()
3820 .mr(1)
3821 .nr(8)
3822 .kr(4)
3823 .sr(2)
3824 .m(m)
3825 .n(8)
3826 .k(16)
3827 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003828 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08003829 }
3830 }
3831
Frank Barcharde22685a2021-11-12 11:36:58 -08003832 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, k_eq_16_subtile_n) {
Frank Barchardeb704f72021-11-12 01:26:50 -08003833 TEST_REQUIRES_ARM_NEON_V8;
3834 for (uint32_t n = 1; n <= 8; n++) {
3835 GemmMicrokernelTester()
3836 .mr(1)
3837 .nr(8)
3838 .kr(4)
3839 .sr(2)
3840 .m(1)
3841 .n(n)
3842 .k(16)
3843 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003844 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08003845 }
3846 }
3847
Frank Barcharde22685a2021-11-12 11:36:58 -08003848 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, k_lt_16) {
Frank Barchardeb704f72021-11-12 01:26:50 -08003849 TEST_REQUIRES_ARM_NEON_V8;
3850 for (size_t k = 1; k < 16; k++) {
3851 GemmMicrokernelTester()
3852 .mr(1)
3853 .nr(8)
3854 .kr(4)
3855 .sr(2)
3856 .m(1)
3857 .n(8)
3858 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003859 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08003860 }
3861 }
3862
Frank Barcharde22685a2021-11-12 11:36:58 -08003863 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, k_lt_16_subtile) {
Frank Barchardeb704f72021-11-12 01:26:50 -08003864 TEST_REQUIRES_ARM_NEON_V8;
3865 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003866 for (uint32_t n = 1; n <= 8; n++) {
3867 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchardeb704f72021-11-12 01:26:50 -08003868 GemmMicrokernelTester()
3869 .mr(1)
3870 .nr(8)
3871 .kr(4)
3872 .sr(2)
3873 .m(m)
3874 .n(n)
3875 .k(k)
3876 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003877 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08003878 }
3879 }
3880 }
3881 }
3882
Frank Barcharde22685a2021-11-12 11:36:58 -08003883 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, k_gt_16) {
Frank Barchardeb704f72021-11-12 01:26:50 -08003884 TEST_REQUIRES_ARM_NEON_V8;
3885 for (size_t k = 17; k < 32; k++) {
3886 GemmMicrokernelTester()
3887 .mr(1)
3888 .nr(8)
3889 .kr(4)
3890 .sr(2)
3891 .m(1)
3892 .n(8)
3893 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003894 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08003895 }
3896 }
3897
Frank Barcharde22685a2021-11-12 11:36:58 -08003898 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, k_gt_16_subtile) {
Frank Barchardeb704f72021-11-12 01:26:50 -08003899 TEST_REQUIRES_ARM_NEON_V8;
3900 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003901 for (uint32_t n = 1; n <= 8; n++) {
3902 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchardeb704f72021-11-12 01:26:50 -08003903 GemmMicrokernelTester()
3904 .mr(1)
3905 .nr(8)
3906 .kr(4)
3907 .sr(2)
3908 .m(m)
3909 .n(n)
3910 .k(k)
3911 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003912 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08003913 }
3914 }
3915 }
3916 }
3917
Frank Barcharde22685a2021-11-12 11:36:58 -08003918 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, k_div_16) {
Frank Barchardeb704f72021-11-12 01:26:50 -08003919 TEST_REQUIRES_ARM_NEON_V8;
3920 for (size_t k = 32; k <= 160; k += 16) {
3921 GemmMicrokernelTester()
3922 .mr(1)
3923 .nr(8)
3924 .kr(4)
3925 .sr(2)
3926 .m(1)
3927 .n(8)
3928 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003929 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08003930 }
3931 }
3932
Frank Barcharde22685a2021-11-12 11:36:58 -08003933 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, k_div_16_subtile) {
Frank Barchardeb704f72021-11-12 01:26:50 -08003934 TEST_REQUIRES_ARM_NEON_V8;
3935 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003936 for (uint32_t n = 1; n <= 8; n++) {
3937 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchardeb704f72021-11-12 01:26:50 -08003938 GemmMicrokernelTester()
3939 .mr(1)
3940 .nr(8)
3941 .kr(4)
3942 .sr(2)
3943 .m(m)
3944 .n(n)
3945 .k(k)
3946 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003947 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08003948 }
3949 }
3950 }
3951 }
3952
Frank Barcharde22685a2021-11-12 11:36:58 -08003953 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, n_gt_8) {
Frank Barchardeb704f72021-11-12 01:26:50 -08003954 TEST_REQUIRES_ARM_NEON_V8;
3955 for (uint32_t n = 9; n < 16; n++) {
3956 for (size_t k = 1; k <= 80; k += 17) {
3957 GemmMicrokernelTester()
3958 .mr(1)
3959 .nr(8)
3960 .kr(4)
3961 .sr(2)
3962 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003963 .n(n)
Frank Barchardeb704f72021-11-12 01:26:50 -08003964 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003965 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08003966 }
3967 }
3968 }
3969
Frank Barcharde22685a2021-11-12 11:36:58 -08003970 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, n_gt_8_strided_cn) {
Frank Barchardeb704f72021-11-12 01:26:50 -08003971 TEST_REQUIRES_ARM_NEON_V8;
3972 for (uint32_t n = 9; n < 16; n++) {
3973 for (size_t k = 1; k <= 80; k += 17) {
3974 GemmMicrokernelTester()
3975 .mr(1)
3976 .nr(8)
3977 .kr(4)
3978 .sr(2)
3979 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003980 .n(n)
Frank Barchardeb704f72021-11-12 01:26:50 -08003981 .k(k)
3982 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08003983 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08003984 }
3985 }
3986 }
3987
Frank Barcharde22685a2021-11-12 11:36:58 -08003988 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, n_gt_8_subtile) {
Frank Barchardeb704f72021-11-12 01:26:50 -08003989 TEST_REQUIRES_ARM_NEON_V8;
3990 for (uint32_t n = 9; n < 16; n++) {
3991 for (size_t k = 1; k <= 80; k += 17) {
3992 for (uint32_t m = 1; m <= 1; m++) {
3993 GemmMicrokernelTester()
3994 .mr(1)
3995 .nr(8)
3996 .kr(4)
3997 .sr(2)
3998 .m(m)
3999 .n(n)
4000 .k(k)
4001 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004002 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08004003 }
4004 }
4005 }
4006 }
4007
Frank Barcharde22685a2021-11-12 11:36:58 -08004008 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, n_div_8) {
Frank Barchardeb704f72021-11-12 01:26:50 -08004009 TEST_REQUIRES_ARM_NEON_V8;
4010 for (uint32_t n = 16; n <= 24; n += 8) {
4011 for (size_t k = 1; k <= 80; k += 17) {
4012 GemmMicrokernelTester()
4013 .mr(1)
4014 .nr(8)
4015 .kr(4)
4016 .sr(2)
4017 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004018 .n(n)
Frank Barchardeb704f72021-11-12 01:26:50 -08004019 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004020 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08004021 }
4022 }
4023 }
4024
Frank Barcharde22685a2021-11-12 11:36:58 -08004025 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, n_div_8_strided_cn) {
Frank Barchardeb704f72021-11-12 01:26:50 -08004026 TEST_REQUIRES_ARM_NEON_V8;
4027 for (uint32_t n = 16; n <= 24; n += 8) {
4028 for (size_t k = 1; k <= 80; k += 17) {
4029 GemmMicrokernelTester()
4030 .mr(1)
4031 .nr(8)
4032 .kr(4)
4033 .sr(2)
4034 .m(1)
4035 .n(n)
4036 .k(k)
4037 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08004038 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08004039 }
4040 }
4041 }
4042
Frank Barcharde22685a2021-11-12 11:36:58 -08004043 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, n_div_8_subtile) {
Frank Barchardeb704f72021-11-12 01:26:50 -08004044 TEST_REQUIRES_ARM_NEON_V8;
4045 for (uint32_t n = 16; n <= 24; n += 8) {
4046 for (size_t k = 1; k <= 80; k += 17) {
4047 for (uint32_t m = 1; m <= 1; m++) {
4048 GemmMicrokernelTester()
4049 .mr(1)
4050 .nr(8)
4051 .kr(4)
4052 .sr(2)
4053 .m(m)
4054 .n(n)
4055 .k(k)
4056 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004057 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08004058 }
4059 }
4060 }
4061 }
4062
Frank Barcharde22685a2021-11-12 11:36:58 -08004063 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, small_kernel) {
Frank Barchardeb704f72021-11-12 01:26:50 -08004064 TEST_REQUIRES_ARM_NEON_V8;
4065 for (size_t k = 1; k <= 80; k += 17) {
4066 GemmMicrokernelTester()
4067 .mr(1)
4068 .nr(8)
4069 .kr(4)
4070 .sr(2)
4071 .m(1)
4072 .n(8)
4073 .k(k)
4074 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08004075 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08004076 }
4077 }
4078
Frank Barcharde22685a2021-11-12 11:36:58 -08004079 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, small_kernel_subtile) {
Frank Barchardeb704f72021-11-12 01:26:50 -08004080 TEST_REQUIRES_ARM_NEON_V8;
4081 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004082 for (uint32_t n = 1; n <= 8; n++) {
4083 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchardeb704f72021-11-12 01:26:50 -08004084 GemmMicrokernelTester()
4085 .mr(1)
4086 .nr(8)
4087 .kr(4)
4088 .sr(2)
4089 .m(m)
4090 .n(n)
4091 .k(k)
4092 .ks(3)
4093 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004094 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08004095 }
4096 }
4097 }
4098 }
4099
Frank Barcharde22685a2021-11-12 11:36:58 -08004100 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, n_gt_8_small_kernel) {
Frank Barchardeb704f72021-11-12 01:26:50 -08004101 TEST_REQUIRES_ARM_NEON_V8;
4102 for (uint32_t n = 9; n < 16; n++) {
4103 for (size_t k = 1; k <= 80; k += 17) {
4104 GemmMicrokernelTester()
4105 .mr(1)
4106 .nr(8)
4107 .kr(4)
4108 .sr(2)
4109 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004110 .n(n)
Frank Barchardeb704f72021-11-12 01:26:50 -08004111 .k(k)
4112 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08004113 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08004114 }
4115 }
4116 }
4117
Frank Barcharde22685a2021-11-12 11:36:58 -08004118 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, n_div_8_small_kernel) {
Frank Barchardeb704f72021-11-12 01:26:50 -08004119 TEST_REQUIRES_ARM_NEON_V8;
4120 for (uint32_t n = 16; n <= 24; n += 8) {
4121 for (size_t k = 1; k <= 80; k += 17) {
4122 GemmMicrokernelTester()
4123 .mr(1)
4124 .nr(8)
4125 .kr(4)
4126 .sr(2)
4127 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004128 .n(n)
Frank Barchardeb704f72021-11-12 01:26:50 -08004129 .k(k)
4130 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08004131 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08004132 }
4133 }
4134 }
4135
Frank Barcharde22685a2021-11-12 11:36:58 -08004136 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, strided_cm_subtile) {
Frank Barchardeb704f72021-11-12 01:26:50 -08004137 TEST_REQUIRES_ARM_NEON_V8;
4138 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004139 for (uint32_t n = 1; n <= 8; n++) {
4140 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchardeb704f72021-11-12 01:26:50 -08004141 GemmMicrokernelTester()
4142 .mr(1)
4143 .nr(8)
4144 .kr(4)
4145 .sr(2)
4146 .m(m)
4147 .n(n)
4148 .k(k)
4149 .cm_stride(11)
4150 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004151 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08004152 }
4153 }
4154 }
4155 }
4156
Frank Barcharde22685a2021-11-12 11:36:58 -08004157 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, a_offset) {
Frank Barchardeb704f72021-11-12 01:26:50 -08004158 TEST_REQUIRES_ARM_NEON_V8;
4159 for (size_t k = 1; k <= 80; k += 17) {
4160 GemmMicrokernelTester()
4161 .mr(1)
4162 .nr(8)
4163 .kr(4)
4164 .sr(2)
4165 .m(1)
4166 .n(8)
4167 .k(k)
4168 .ks(3)
4169 .a_offset(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08004170 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08004171 }
4172 }
4173
Frank Barcharde22685a2021-11-12 11:36:58 -08004174 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, zero) {
Frank Barchardeb704f72021-11-12 01:26:50 -08004175 TEST_REQUIRES_ARM_NEON_V8;
Zhi An Ng83844ae2022-01-14 09:52:25 -08004176 for (size_t k = 1; k <= 80; k += 17) {
4177 for (uint32_t mz = 0; mz < 1; mz++) {
Frank Barchardeb704f72021-11-12 01:26:50 -08004178 GemmMicrokernelTester()
4179 .mr(1)
4180 .nr(8)
4181 .kr(4)
4182 .sr(2)
4183 .m(1)
4184 .n(8)
4185 .k(k)
4186 .ks(3)
4187 .a_offset(83)
4188 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08004189 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08004190 }
4191 }
4192 }
4193
Frank Barcharde22685a2021-11-12 11:36:58 -08004194 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, qmin) {
Frank Barchardeb704f72021-11-12 01:26:50 -08004195 TEST_REQUIRES_ARM_NEON_V8;
4196 GemmMicrokernelTester()
4197 .mr(1)
4198 .nr(8)
4199 .kr(4)
4200 .sr(2)
4201 .m(1)
4202 .n(8)
4203 .k(16)
4204 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004205 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08004206 }
4207
Frank Barcharde22685a2021-11-12 11:36:58 -08004208 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, qmax) {
Frank Barchardeb704f72021-11-12 01:26:50 -08004209 TEST_REQUIRES_ARM_NEON_V8;
4210 GemmMicrokernelTester()
4211 .mr(1)
4212 .nr(8)
4213 .kr(4)
4214 .sr(2)
4215 .m(1)
4216 .n(8)
4217 .k(16)
4218 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004219 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08004220 }
4221
Frank Barcharde22685a2021-11-12 11:36:58 -08004222 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, strided_cm) {
Frank Barchardeb704f72021-11-12 01:26:50 -08004223 TEST_REQUIRES_ARM_NEON_V8;
4224 GemmMicrokernelTester()
4225 .mr(1)
4226 .nr(8)
4227 .kr(4)
4228 .sr(2)
4229 .m(1)
4230 .n(8)
4231 .k(16)
4232 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08004233 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08004234 }
4235#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
4236
4237
4238#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Frank Barcharde22685a2021-11-12 11:36:58 -08004239 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, k_eq_16) {
Frank Barchardeb704f72021-11-12 01:26:50 -08004240 TEST_REQUIRES_ARM_NEON_V8;
4241 GemmMicrokernelTester()
4242 .mr(2)
4243 .nr(8)
4244 .kr(4)
4245 .sr(2)
4246 .m(2)
4247 .n(8)
4248 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08004249 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08004250 }
4251
Frank Barcharde22685a2021-11-12 11:36:58 -08004252 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, strided_cn) {
Frank Barchardeb704f72021-11-12 01:26:50 -08004253 TEST_REQUIRES_ARM_NEON_V8;
4254 GemmMicrokernelTester()
4255 .mr(2)
4256 .nr(8)
4257 .kr(4)
4258 .sr(2)
4259 .m(2)
4260 .n(8)
4261 .k(16)
4262 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08004263 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08004264 }
4265
Frank Barcharde22685a2021-11-12 11:36:58 -08004266 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, k_eq_16_subtile) {
Frank Barchardeb704f72021-11-12 01:26:50 -08004267 TEST_REQUIRES_ARM_NEON_V8;
Zhi An Ng83844ae2022-01-14 09:52:25 -08004268 for (uint32_t n = 1; n <= 8; n++) {
4269 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchardeb704f72021-11-12 01:26:50 -08004270 GemmMicrokernelTester()
4271 .mr(2)
4272 .nr(8)
4273 .kr(4)
4274 .sr(2)
4275 .m(m)
4276 .n(n)
4277 .k(16)
4278 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004279 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08004280 }
4281 }
4282 }
4283
Frank Barcharde22685a2021-11-12 11:36:58 -08004284 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, k_eq_16_subtile_m) {
Frank Barchardeb704f72021-11-12 01:26:50 -08004285 TEST_REQUIRES_ARM_NEON_V8;
4286 for (uint32_t m = 1; m <= 2; m++) {
4287 GemmMicrokernelTester()
4288 .mr(2)
4289 .nr(8)
4290 .kr(4)
4291 .sr(2)
4292 .m(m)
4293 .n(8)
4294 .k(16)
4295 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004296 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08004297 }
4298 }
4299
Frank Barcharde22685a2021-11-12 11:36:58 -08004300 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, k_eq_16_subtile_n) {
Frank Barchardeb704f72021-11-12 01:26:50 -08004301 TEST_REQUIRES_ARM_NEON_V8;
4302 for (uint32_t n = 1; n <= 8; n++) {
4303 GemmMicrokernelTester()
4304 .mr(2)
4305 .nr(8)
4306 .kr(4)
4307 .sr(2)
4308 .m(2)
4309 .n(n)
4310 .k(16)
4311 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004312 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08004313 }
4314 }
4315
Frank Barcharde22685a2021-11-12 11:36:58 -08004316 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, k_lt_16) {
Frank Barchardeb704f72021-11-12 01:26:50 -08004317 TEST_REQUIRES_ARM_NEON_V8;
4318 for (size_t k = 1; k < 16; k++) {
4319 GemmMicrokernelTester()
4320 .mr(2)
4321 .nr(8)
4322 .kr(4)
4323 .sr(2)
4324 .m(2)
4325 .n(8)
4326 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004327 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08004328 }
4329 }
4330
Frank Barcharde22685a2021-11-12 11:36:58 -08004331 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, k_lt_16_subtile) {
Frank Barchardeb704f72021-11-12 01:26:50 -08004332 TEST_REQUIRES_ARM_NEON_V8;
4333 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004334 for (uint32_t n = 1; n <= 8; n++) {
4335 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchardeb704f72021-11-12 01:26:50 -08004336 GemmMicrokernelTester()
4337 .mr(2)
4338 .nr(8)
4339 .kr(4)
4340 .sr(2)
4341 .m(m)
4342 .n(n)
4343 .k(k)
4344 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004345 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08004346 }
4347 }
4348 }
4349 }
4350
Frank Barcharde22685a2021-11-12 11:36:58 -08004351 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, k_gt_16) {
Frank Barchardeb704f72021-11-12 01:26:50 -08004352 TEST_REQUIRES_ARM_NEON_V8;
4353 for (size_t k = 17; k < 32; k++) {
4354 GemmMicrokernelTester()
4355 .mr(2)
4356 .nr(8)
4357 .kr(4)
4358 .sr(2)
4359 .m(2)
4360 .n(8)
4361 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004362 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08004363 }
4364 }
4365
Frank Barcharde22685a2021-11-12 11:36:58 -08004366 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, k_gt_16_subtile) {
Frank Barchardeb704f72021-11-12 01:26:50 -08004367 TEST_REQUIRES_ARM_NEON_V8;
4368 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004369 for (uint32_t n = 1; n <= 8; n++) {
4370 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchardeb704f72021-11-12 01:26:50 -08004371 GemmMicrokernelTester()
4372 .mr(2)
4373 .nr(8)
4374 .kr(4)
4375 .sr(2)
4376 .m(m)
4377 .n(n)
4378 .k(k)
4379 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004380 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08004381 }
4382 }
4383 }
4384 }
4385
Frank Barcharde22685a2021-11-12 11:36:58 -08004386 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, k_div_16) {
Frank Barchardeb704f72021-11-12 01:26:50 -08004387 TEST_REQUIRES_ARM_NEON_V8;
4388 for (size_t k = 32; k <= 160; k += 16) {
4389 GemmMicrokernelTester()
4390 .mr(2)
4391 .nr(8)
4392 .kr(4)
4393 .sr(2)
4394 .m(2)
4395 .n(8)
4396 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004397 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08004398 }
4399 }
4400
Frank Barcharde22685a2021-11-12 11:36:58 -08004401 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, k_div_16_subtile) {
Frank Barchardeb704f72021-11-12 01:26:50 -08004402 TEST_REQUIRES_ARM_NEON_V8;
4403 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004404 for (uint32_t n = 1; n <= 8; n++) {
4405 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchardeb704f72021-11-12 01:26:50 -08004406 GemmMicrokernelTester()
4407 .mr(2)
4408 .nr(8)
4409 .kr(4)
4410 .sr(2)
4411 .m(m)
4412 .n(n)
4413 .k(k)
4414 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004415 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08004416 }
4417 }
4418 }
4419 }
4420
Frank Barcharde22685a2021-11-12 11:36:58 -08004421 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, n_gt_8) {
Frank Barchardeb704f72021-11-12 01:26:50 -08004422 TEST_REQUIRES_ARM_NEON_V8;
4423 for (uint32_t n = 9; n < 16; n++) {
4424 for (size_t k = 1; k <= 80; k += 17) {
4425 GemmMicrokernelTester()
4426 .mr(2)
4427 .nr(8)
4428 .kr(4)
4429 .sr(2)
4430 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004431 .n(n)
Frank Barchardeb704f72021-11-12 01:26:50 -08004432 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004433 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08004434 }
4435 }
4436 }
4437
Frank Barcharde22685a2021-11-12 11:36:58 -08004438 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, n_gt_8_strided_cn) {
Frank Barchardeb704f72021-11-12 01:26:50 -08004439 TEST_REQUIRES_ARM_NEON_V8;
4440 for (uint32_t n = 9; n < 16; n++) {
4441 for (size_t k = 1; k <= 80; k += 17) {
4442 GemmMicrokernelTester()
4443 .mr(2)
4444 .nr(8)
4445 .kr(4)
4446 .sr(2)
4447 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004448 .n(n)
Frank Barchardeb704f72021-11-12 01:26:50 -08004449 .k(k)
4450 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08004451 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08004452 }
4453 }
4454 }
4455
Frank Barcharde22685a2021-11-12 11:36:58 -08004456 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, n_gt_8_subtile) {
Frank Barchardeb704f72021-11-12 01:26:50 -08004457 TEST_REQUIRES_ARM_NEON_V8;
4458 for (uint32_t n = 9; n < 16; n++) {
4459 for (size_t k = 1; k <= 80; k += 17) {
4460 for (uint32_t m = 1; m <= 2; m++) {
4461 GemmMicrokernelTester()
4462 .mr(2)
4463 .nr(8)
4464 .kr(4)
4465 .sr(2)
4466 .m(m)
4467 .n(n)
4468 .k(k)
4469 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004470 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08004471 }
4472 }
4473 }
4474 }
4475
Frank Barcharde22685a2021-11-12 11:36:58 -08004476 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, n_div_8) {
Frank Barchardeb704f72021-11-12 01:26:50 -08004477 TEST_REQUIRES_ARM_NEON_V8;
4478 for (uint32_t n = 16; n <= 24; n += 8) {
4479 for (size_t k = 1; k <= 80; k += 17) {
4480 GemmMicrokernelTester()
4481 .mr(2)
4482 .nr(8)
4483 .kr(4)
4484 .sr(2)
4485 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004486 .n(n)
Frank Barchardeb704f72021-11-12 01:26:50 -08004487 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004488 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08004489 }
4490 }
4491 }
4492
Frank Barcharde22685a2021-11-12 11:36:58 -08004493 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, n_div_8_strided_cn) {
Frank Barchardeb704f72021-11-12 01:26:50 -08004494 TEST_REQUIRES_ARM_NEON_V8;
4495 for (uint32_t n = 16; n <= 24; n += 8) {
4496 for (size_t k = 1; k <= 80; k += 17) {
4497 GemmMicrokernelTester()
4498 .mr(2)
4499 .nr(8)
4500 .kr(4)
4501 .sr(2)
4502 .m(2)
4503 .n(n)
4504 .k(k)
4505 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08004506 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08004507 }
4508 }
4509 }
4510
Frank Barcharde22685a2021-11-12 11:36:58 -08004511 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, n_div_8_subtile) {
Frank Barchardeb704f72021-11-12 01:26:50 -08004512 TEST_REQUIRES_ARM_NEON_V8;
4513 for (uint32_t n = 16; n <= 24; n += 8) {
4514 for (size_t k = 1; k <= 80; k += 17) {
4515 for (uint32_t m = 1; m <= 2; m++) {
4516 GemmMicrokernelTester()
4517 .mr(2)
4518 .nr(8)
4519 .kr(4)
4520 .sr(2)
4521 .m(m)
4522 .n(n)
4523 .k(k)
4524 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004525 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08004526 }
4527 }
4528 }
4529 }
4530
Frank Barcharde22685a2021-11-12 11:36:58 -08004531 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, small_kernel) {
Frank Barchardeb704f72021-11-12 01:26:50 -08004532 TEST_REQUIRES_ARM_NEON_V8;
4533 for (size_t k = 1; k <= 80; k += 17) {
4534 GemmMicrokernelTester()
4535 .mr(2)
4536 .nr(8)
4537 .kr(4)
4538 .sr(2)
4539 .m(2)
4540 .n(8)
4541 .k(k)
4542 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08004543 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08004544 }
4545 }
4546
Frank Barcharde22685a2021-11-12 11:36:58 -08004547 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, small_kernel_subtile) {
Frank Barchardeb704f72021-11-12 01:26:50 -08004548 TEST_REQUIRES_ARM_NEON_V8;
4549 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004550 for (uint32_t n = 1; n <= 8; n++) {
4551 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchardeb704f72021-11-12 01:26:50 -08004552 GemmMicrokernelTester()
4553 .mr(2)
4554 .nr(8)
4555 .kr(4)
4556 .sr(2)
4557 .m(m)
4558 .n(n)
4559 .k(k)
4560 .ks(3)
4561 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004562 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08004563 }
4564 }
4565 }
4566 }
4567
Frank Barcharde22685a2021-11-12 11:36:58 -08004568 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, n_gt_8_small_kernel) {
Frank Barchardeb704f72021-11-12 01:26:50 -08004569 TEST_REQUIRES_ARM_NEON_V8;
4570 for (uint32_t n = 9; n < 16; n++) {
4571 for (size_t k = 1; k <= 80; k += 17) {
4572 GemmMicrokernelTester()
4573 .mr(2)
4574 .nr(8)
4575 .kr(4)
4576 .sr(2)
4577 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004578 .n(n)
Frank Barchardeb704f72021-11-12 01:26:50 -08004579 .k(k)
4580 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08004581 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08004582 }
4583 }
4584 }
4585
Frank Barcharde22685a2021-11-12 11:36:58 -08004586 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, n_div_8_small_kernel) {
Frank Barchardeb704f72021-11-12 01:26:50 -08004587 TEST_REQUIRES_ARM_NEON_V8;
4588 for (uint32_t n = 16; n <= 24; n += 8) {
4589 for (size_t k = 1; k <= 80; k += 17) {
4590 GemmMicrokernelTester()
4591 .mr(2)
4592 .nr(8)
4593 .kr(4)
4594 .sr(2)
4595 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004596 .n(n)
Frank Barchardeb704f72021-11-12 01:26:50 -08004597 .k(k)
4598 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08004599 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08004600 }
4601 }
4602 }
4603
Frank Barcharde22685a2021-11-12 11:36:58 -08004604 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, strided_cm_subtile) {
Frank Barchardeb704f72021-11-12 01:26:50 -08004605 TEST_REQUIRES_ARM_NEON_V8;
4606 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004607 for (uint32_t n = 1; n <= 8; n++) {
4608 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchardeb704f72021-11-12 01:26:50 -08004609 GemmMicrokernelTester()
4610 .mr(2)
4611 .nr(8)
4612 .kr(4)
4613 .sr(2)
4614 .m(m)
4615 .n(n)
4616 .k(k)
4617 .cm_stride(11)
4618 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004619 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08004620 }
4621 }
4622 }
4623 }
4624
Frank Barcharde22685a2021-11-12 11:36:58 -08004625 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, a_offset) {
Frank Barchardeb704f72021-11-12 01:26:50 -08004626 TEST_REQUIRES_ARM_NEON_V8;
4627 for (size_t k = 1; k <= 80; k += 17) {
4628 GemmMicrokernelTester()
4629 .mr(2)
4630 .nr(8)
4631 .kr(4)
4632 .sr(2)
4633 .m(2)
4634 .n(8)
4635 .k(k)
4636 .ks(3)
4637 .a_offset(163)
Marat Dukhan50323b82022-01-11 00:12:01 -08004638 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08004639 }
4640 }
4641
Frank Barcharde22685a2021-11-12 11:36:58 -08004642 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, zero) {
Frank Barchardeb704f72021-11-12 01:26:50 -08004643 TEST_REQUIRES_ARM_NEON_V8;
Zhi An Ng83844ae2022-01-14 09:52:25 -08004644 for (size_t k = 1; k <= 80; k += 17) {
4645 for (uint32_t mz = 0; mz < 2; mz++) {
Frank Barchardeb704f72021-11-12 01:26:50 -08004646 GemmMicrokernelTester()
4647 .mr(2)
4648 .nr(8)
4649 .kr(4)
4650 .sr(2)
4651 .m(2)
4652 .n(8)
4653 .k(k)
4654 .ks(3)
4655 .a_offset(163)
4656 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08004657 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08004658 }
4659 }
4660 }
4661
Frank Barcharde22685a2021-11-12 11:36:58 -08004662 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, qmin) {
Frank Barchardeb704f72021-11-12 01:26:50 -08004663 TEST_REQUIRES_ARM_NEON_V8;
4664 GemmMicrokernelTester()
4665 .mr(2)
4666 .nr(8)
4667 .kr(4)
4668 .sr(2)
4669 .m(2)
4670 .n(8)
4671 .k(16)
4672 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004673 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08004674 }
4675
Frank Barcharde22685a2021-11-12 11:36:58 -08004676 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, qmax) {
Frank Barchardeb704f72021-11-12 01:26:50 -08004677 TEST_REQUIRES_ARM_NEON_V8;
4678 GemmMicrokernelTester()
4679 .mr(2)
4680 .nr(8)
4681 .kr(4)
4682 .sr(2)
4683 .m(2)
4684 .n(8)
4685 .k(16)
4686 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004687 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08004688 }
4689
Frank Barcharde22685a2021-11-12 11:36:58 -08004690 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, strided_cm) {
Frank Barchardeb704f72021-11-12 01:26:50 -08004691 TEST_REQUIRES_ARM_NEON_V8;
4692 GemmMicrokernelTester()
4693 .mr(2)
4694 .nr(8)
4695 .kr(4)
4696 .sr(2)
4697 .m(2)
4698 .n(8)
4699 .k(16)
4700 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08004701 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardeb704f72021-11-12 01:26:50 -08004702 }
4703#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
4704
4705
4706#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Frank Barcharde22685a2021-11-12 11:36:58 -08004707 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, k_eq_16) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08004708 TEST_REQUIRES_ARM_NEON;
4709 GemmMicrokernelTester()
4710 .mr(1)
4711 .nr(8)
4712 .kr(2)
4713 .sr(4)
4714 .m(1)
4715 .n(8)
4716 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08004717 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08004718 }
4719
Frank Barcharde22685a2021-11-12 11:36:58 -08004720 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, strided_cn) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08004721 TEST_REQUIRES_ARM_NEON;
4722 GemmMicrokernelTester()
4723 .mr(1)
4724 .nr(8)
4725 .kr(2)
4726 .sr(4)
4727 .m(1)
4728 .n(8)
4729 .k(16)
4730 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08004731 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08004732 }
4733
Frank Barcharde22685a2021-11-12 11:36:58 -08004734 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, k_eq_16_subtile) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08004735 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08004736 for (uint32_t n = 1; n <= 8; n++) {
4737 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08004738 GemmMicrokernelTester()
4739 .mr(1)
4740 .nr(8)
4741 .kr(2)
4742 .sr(4)
4743 .m(m)
4744 .n(n)
4745 .k(16)
4746 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004747 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08004748 }
4749 }
4750 }
4751
Frank Barcharde22685a2021-11-12 11:36:58 -08004752 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, k_eq_16_subtile_m) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08004753 TEST_REQUIRES_ARM_NEON;
4754 for (uint32_t m = 1; m <= 1; m++) {
4755 GemmMicrokernelTester()
4756 .mr(1)
4757 .nr(8)
4758 .kr(2)
4759 .sr(4)
4760 .m(m)
4761 .n(8)
4762 .k(16)
4763 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004764 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08004765 }
4766 }
4767
Frank Barcharde22685a2021-11-12 11:36:58 -08004768 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, k_eq_16_subtile_n) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08004769 TEST_REQUIRES_ARM_NEON;
4770 for (uint32_t n = 1; n <= 8; n++) {
4771 GemmMicrokernelTester()
4772 .mr(1)
4773 .nr(8)
4774 .kr(2)
4775 .sr(4)
4776 .m(1)
4777 .n(n)
4778 .k(16)
4779 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004780 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08004781 }
4782 }
4783
Frank Barcharde22685a2021-11-12 11:36:58 -08004784 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, k_lt_16) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08004785 TEST_REQUIRES_ARM_NEON;
4786 for (size_t k = 1; k < 16; k++) {
4787 GemmMicrokernelTester()
4788 .mr(1)
4789 .nr(8)
4790 .kr(2)
4791 .sr(4)
4792 .m(1)
4793 .n(8)
4794 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004795 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08004796 }
4797 }
4798
Frank Barcharde22685a2021-11-12 11:36:58 -08004799 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, k_lt_16_subtile) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08004800 TEST_REQUIRES_ARM_NEON;
4801 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004802 for (uint32_t n = 1; n <= 8; n++) {
4803 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08004804 GemmMicrokernelTester()
4805 .mr(1)
4806 .nr(8)
4807 .kr(2)
4808 .sr(4)
4809 .m(m)
4810 .n(n)
4811 .k(k)
4812 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004813 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08004814 }
4815 }
4816 }
4817 }
4818
Frank Barcharde22685a2021-11-12 11:36:58 -08004819 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, k_gt_16) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08004820 TEST_REQUIRES_ARM_NEON;
4821 for (size_t k = 17; k < 32; k++) {
4822 GemmMicrokernelTester()
4823 .mr(1)
4824 .nr(8)
4825 .kr(2)
4826 .sr(4)
4827 .m(1)
4828 .n(8)
4829 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004830 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08004831 }
4832 }
4833
Frank Barcharde22685a2021-11-12 11:36:58 -08004834 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, k_gt_16_subtile) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08004835 TEST_REQUIRES_ARM_NEON;
4836 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004837 for (uint32_t n = 1; n <= 8; n++) {
4838 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08004839 GemmMicrokernelTester()
4840 .mr(1)
4841 .nr(8)
4842 .kr(2)
4843 .sr(4)
4844 .m(m)
4845 .n(n)
4846 .k(k)
4847 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004848 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08004849 }
4850 }
4851 }
4852 }
4853
Frank Barcharde22685a2021-11-12 11:36:58 -08004854 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, k_div_16) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08004855 TEST_REQUIRES_ARM_NEON;
4856 for (size_t k = 32; k <= 160; k += 16) {
4857 GemmMicrokernelTester()
4858 .mr(1)
4859 .nr(8)
4860 .kr(2)
4861 .sr(4)
4862 .m(1)
4863 .n(8)
4864 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004865 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08004866 }
4867 }
4868
Frank Barcharde22685a2021-11-12 11:36:58 -08004869 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, k_div_16_subtile) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08004870 TEST_REQUIRES_ARM_NEON;
4871 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004872 for (uint32_t n = 1; n <= 8; n++) {
4873 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08004874 GemmMicrokernelTester()
4875 .mr(1)
4876 .nr(8)
4877 .kr(2)
4878 .sr(4)
4879 .m(m)
4880 .n(n)
4881 .k(k)
4882 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004883 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08004884 }
4885 }
4886 }
4887 }
4888
Frank Barcharde22685a2021-11-12 11:36:58 -08004889 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, n_gt_8) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08004890 TEST_REQUIRES_ARM_NEON;
4891 for (uint32_t n = 9; n < 16; n++) {
4892 for (size_t k = 1; k <= 80; k += 17) {
4893 GemmMicrokernelTester()
4894 .mr(1)
4895 .nr(8)
4896 .kr(2)
4897 .sr(4)
4898 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004899 .n(n)
Frank Barchardc7a032d2021-11-10 12:37:49 -08004900 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004901 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08004902 }
4903 }
4904 }
4905
Frank Barcharde22685a2021-11-12 11:36:58 -08004906 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, n_gt_8_strided_cn) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08004907 TEST_REQUIRES_ARM_NEON;
4908 for (uint32_t n = 9; n < 16; n++) {
4909 for (size_t k = 1; k <= 80; k += 17) {
4910 GemmMicrokernelTester()
4911 .mr(1)
4912 .nr(8)
4913 .kr(2)
4914 .sr(4)
4915 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004916 .n(n)
Frank Barchardc7a032d2021-11-10 12:37:49 -08004917 .k(k)
4918 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08004919 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08004920 }
4921 }
4922 }
4923
Frank Barcharde22685a2021-11-12 11:36:58 -08004924 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, n_gt_8_subtile) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08004925 TEST_REQUIRES_ARM_NEON;
4926 for (uint32_t n = 9; n < 16; n++) {
4927 for (size_t k = 1; k <= 80; k += 17) {
4928 for (uint32_t m = 1; m <= 1; m++) {
4929 GemmMicrokernelTester()
4930 .mr(1)
4931 .nr(8)
4932 .kr(2)
4933 .sr(4)
4934 .m(m)
4935 .n(n)
4936 .k(k)
4937 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004938 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08004939 }
4940 }
4941 }
4942 }
4943
Frank Barcharde22685a2021-11-12 11:36:58 -08004944 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, n_div_8) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08004945 TEST_REQUIRES_ARM_NEON;
4946 for (uint32_t n = 16; n <= 24; n += 8) {
4947 for (size_t k = 1; k <= 80; k += 17) {
4948 GemmMicrokernelTester()
4949 .mr(1)
4950 .nr(8)
4951 .kr(2)
4952 .sr(4)
4953 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004954 .n(n)
Frank Barchardc7a032d2021-11-10 12:37:49 -08004955 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004956 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08004957 }
4958 }
4959 }
4960
Frank Barcharde22685a2021-11-12 11:36:58 -08004961 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, n_div_8_strided_cn) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08004962 TEST_REQUIRES_ARM_NEON;
4963 for (uint32_t n = 16; n <= 24; n += 8) {
4964 for (size_t k = 1; k <= 80; k += 17) {
4965 GemmMicrokernelTester()
4966 .mr(1)
4967 .nr(8)
4968 .kr(2)
4969 .sr(4)
4970 .m(1)
4971 .n(n)
4972 .k(k)
4973 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08004974 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08004975 }
4976 }
4977 }
4978
Frank Barcharde22685a2021-11-12 11:36:58 -08004979 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, n_div_8_subtile) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08004980 TEST_REQUIRES_ARM_NEON;
4981 for (uint32_t n = 16; n <= 24; n += 8) {
4982 for (size_t k = 1; k <= 80; k += 17) {
4983 for (uint32_t m = 1; m <= 1; m++) {
4984 GemmMicrokernelTester()
4985 .mr(1)
4986 .nr(8)
4987 .kr(2)
4988 .sr(4)
4989 .m(m)
4990 .n(n)
4991 .k(k)
4992 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004993 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08004994 }
4995 }
4996 }
4997 }
4998
Frank Barcharde22685a2021-11-12 11:36:58 -08004999 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, small_kernel) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08005000 TEST_REQUIRES_ARM_NEON;
5001 for (size_t k = 1; k <= 80; k += 17) {
5002 GemmMicrokernelTester()
5003 .mr(1)
5004 .nr(8)
5005 .kr(2)
5006 .sr(4)
5007 .m(1)
5008 .n(8)
5009 .k(k)
5010 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08005011 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08005012 }
5013 }
5014
Frank Barcharde22685a2021-11-12 11:36:58 -08005015 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, small_kernel_subtile) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08005016 TEST_REQUIRES_ARM_NEON;
5017 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005018 for (uint32_t n = 1; n <= 8; n++) {
5019 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08005020 GemmMicrokernelTester()
5021 .mr(1)
5022 .nr(8)
5023 .kr(2)
5024 .sr(4)
5025 .m(m)
5026 .n(n)
5027 .k(k)
5028 .ks(3)
5029 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005030 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08005031 }
5032 }
5033 }
5034 }
5035
Frank Barcharde22685a2021-11-12 11:36:58 -08005036 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, n_gt_8_small_kernel) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08005037 TEST_REQUIRES_ARM_NEON;
5038 for (uint32_t n = 9; n < 16; n++) {
5039 for (size_t k = 1; k <= 80; k += 17) {
5040 GemmMicrokernelTester()
5041 .mr(1)
5042 .nr(8)
5043 .kr(2)
5044 .sr(4)
5045 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005046 .n(n)
Frank Barchardc7a032d2021-11-10 12:37:49 -08005047 .k(k)
5048 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08005049 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08005050 }
5051 }
5052 }
5053
Frank Barcharde22685a2021-11-12 11:36:58 -08005054 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, n_div_8_small_kernel) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08005055 TEST_REQUIRES_ARM_NEON;
5056 for (uint32_t n = 16; n <= 24; n += 8) {
5057 for (size_t k = 1; k <= 80; k += 17) {
5058 GemmMicrokernelTester()
5059 .mr(1)
5060 .nr(8)
5061 .kr(2)
5062 .sr(4)
5063 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005064 .n(n)
Frank Barchardc7a032d2021-11-10 12:37:49 -08005065 .k(k)
5066 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08005067 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08005068 }
5069 }
5070 }
5071
Frank Barcharde22685a2021-11-12 11:36:58 -08005072 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, strided_cm_subtile) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08005073 TEST_REQUIRES_ARM_NEON;
5074 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005075 for (uint32_t n = 1; n <= 8; n++) {
5076 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08005077 GemmMicrokernelTester()
5078 .mr(1)
5079 .nr(8)
5080 .kr(2)
5081 .sr(4)
5082 .m(m)
5083 .n(n)
5084 .k(k)
5085 .cm_stride(11)
5086 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005087 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08005088 }
5089 }
5090 }
5091 }
5092
Frank Barcharde22685a2021-11-12 11:36:58 -08005093 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, a_offset) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08005094 TEST_REQUIRES_ARM_NEON;
5095 for (size_t k = 1; k <= 80; k += 17) {
5096 GemmMicrokernelTester()
5097 .mr(1)
5098 .nr(8)
5099 .kr(2)
5100 .sr(4)
5101 .m(1)
5102 .n(8)
5103 .k(k)
5104 .ks(3)
5105 .a_offset(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08005106 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08005107 }
5108 }
5109
Frank Barcharde22685a2021-11-12 11:36:58 -08005110 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, zero) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08005111 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08005112 for (size_t k = 1; k <= 80; k += 17) {
5113 for (uint32_t mz = 0; mz < 1; mz++) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08005114 GemmMicrokernelTester()
5115 .mr(1)
5116 .nr(8)
5117 .kr(2)
5118 .sr(4)
5119 .m(1)
5120 .n(8)
5121 .k(k)
5122 .ks(3)
5123 .a_offset(83)
5124 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08005125 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08005126 }
5127 }
5128 }
5129
Frank Barcharde22685a2021-11-12 11:36:58 -08005130 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, qmin) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08005131 TEST_REQUIRES_ARM_NEON;
5132 GemmMicrokernelTester()
5133 .mr(1)
5134 .nr(8)
5135 .kr(2)
5136 .sr(4)
5137 .m(1)
5138 .n(8)
5139 .k(16)
5140 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005141 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08005142 }
5143
Frank Barcharde22685a2021-11-12 11:36:58 -08005144 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, qmax) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08005145 TEST_REQUIRES_ARM_NEON;
5146 GemmMicrokernelTester()
5147 .mr(1)
5148 .nr(8)
5149 .kr(2)
5150 .sr(4)
5151 .m(1)
5152 .n(8)
5153 .k(16)
5154 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005155 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08005156 }
5157
Frank Barcharde22685a2021-11-12 11:36:58 -08005158 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, strided_cm) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08005159 TEST_REQUIRES_ARM_NEON;
5160 GemmMicrokernelTester()
5161 .mr(1)
5162 .nr(8)
5163 .kr(2)
5164 .sr(4)
5165 .m(1)
5166 .n(8)
5167 .k(16)
5168 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08005169 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08005170 }
5171#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
5172
5173
5174#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Frank Barcharde22685a2021-11-12 11:36:58 -08005175 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, k_eq_16) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08005176 TEST_REQUIRES_ARM_NEON;
5177 GemmMicrokernelTester()
5178 .mr(2)
5179 .nr(8)
5180 .kr(2)
5181 .sr(4)
5182 .m(2)
5183 .n(8)
5184 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08005185 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08005186 }
5187
Frank Barcharde22685a2021-11-12 11:36:58 -08005188 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, strided_cn) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08005189 TEST_REQUIRES_ARM_NEON;
5190 GemmMicrokernelTester()
5191 .mr(2)
5192 .nr(8)
5193 .kr(2)
5194 .sr(4)
5195 .m(2)
5196 .n(8)
5197 .k(16)
5198 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08005199 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08005200 }
5201
Frank Barcharde22685a2021-11-12 11:36:58 -08005202 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, k_eq_16_subtile) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08005203 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08005204 for (uint32_t n = 1; n <= 8; n++) {
5205 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08005206 GemmMicrokernelTester()
5207 .mr(2)
5208 .nr(8)
5209 .kr(2)
5210 .sr(4)
5211 .m(m)
5212 .n(n)
5213 .k(16)
5214 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005215 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08005216 }
5217 }
5218 }
5219
Frank Barcharde22685a2021-11-12 11:36:58 -08005220 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, k_eq_16_subtile_m) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08005221 TEST_REQUIRES_ARM_NEON;
5222 for (uint32_t m = 1; m <= 2; m++) {
5223 GemmMicrokernelTester()
5224 .mr(2)
5225 .nr(8)
5226 .kr(2)
5227 .sr(4)
5228 .m(m)
5229 .n(8)
5230 .k(16)
5231 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005232 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08005233 }
5234 }
5235
Frank Barcharde22685a2021-11-12 11:36:58 -08005236 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, k_eq_16_subtile_n) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08005237 TEST_REQUIRES_ARM_NEON;
5238 for (uint32_t n = 1; n <= 8; n++) {
5239 GemmMicrokernelTester()
5240 .mr(2)
5241 .nr(8)
5242 .kr(2)
5243 .sr(4)
5244 .m(2)
5245 .n(n)
5246 .k(16)
5247 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005248 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08005249 }
5250 }
5251
Frank Barcharde22685a2021-11-12 11:36:58 -08005252 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, k_lt_16) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08005253 TEST_REQUIRES_ARM_NEON;
5254 for (size_t k = 1; k < 16; k++) {
5255 GemmMicrokernelTester()
5256 .mr(2)
5257 .nr(8)
5258 .kr(2)
5259 .sr(4)
5260 .m(2)
5261 .n(8)
5262 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005263 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08005264 }
5265 }
5266
Frank Barcharde22685a2021-11-12 11:36:58 -08005267 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, k_lt_16_subtile) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08005268 TEST_REQUIRES_ARM_NEON;
5269 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005270 for (uint32_t n = 1; n <= 8; n++) {
5271 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08005272 GemmMicrokernelTester()
5273 .mr(2)
5274 .nr(8)
5275 .kr(2)
5276 .sr(4)
5277 .m(m)
5278 .n(n)
5279 .k(k)
5280 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005281 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08005282 }
5283 }
5284 }
5285 }
5286
Frank Barcharde22685a2021-11-12 11:36:58 -08005287 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, k_gt_16) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08005288 TEST_REQUIRES_ARM_NEON;
5289 for (size_t k = 17; k < 32; k++) {
5290 GemmMicrokernelTester()
5291 .mr(2)
5292 .nr(8)
5293 .kr(2)
5294 .sr(4)
5295 .m(2)
5296 .n(8)
5297 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005298 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08005299 }
5300 }
5301
Frank Barcharde22685a2021-11-12 11:36:58 -08005302 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, k_gt_16_subtile) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08005303 TEST_REQUIRES_ARM_NEON;
5304 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005305 for (uint32_t n = 1; n <= 8; n++) {
5306 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08005307 GemmMicrokernelTester()
5308 .mr(2)
5309 .nr(8)
5310 .kr(2)
5311 .sr(4)
5312 .m(m)
5313 .n(n)
5314 .k(k)
5315 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005316 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08005317 }
5318 }
5319 }
5320 }
5321
Frank Barcharde22685a2021-11-12 11:36:58 -08005322 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, k_div_16) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08005323 TEST_REQUIRES_ARM_NEON;
5324 for (size_t k = 32; k <= 160; k += 16) {
5325 GemmMicrokernelTester()
5326 .mr(2)
5327 .nr(8)
5328 .kr(2)
5329 .sr(4)
5330 .m(2)
5331 .n(8)
5332 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005333 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08005334 }
5335 }
5336
Frank Barcharde22685a2021-11-12 11:36:58 -08005337 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, k_div_16_subtile) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08005338 TEST_REQUIRES_ARM_NEON;
5339 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005340 for (uint32_t n = 1; n <= 8; n++) {
5341 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08005342 GemmMicrokernelTester()
5343 .mr(2)
5344 .nr(8)
5345 .kr(2)
5346 .sr(4)
5347 .m(m)
5348 .n(n)
5349 .k(k)
5350 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005351 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08005352 }
5353 }
5354 }
5355 }
5356
Frank Barcharde22685a2021-11-12 11:36:58 -08005357 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, n_gt_8) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08005358 TEST_REQUIRES_ARM_NEON;
5359 for (uint32_t n = 9; n < 16; n++) {
5360 for (size_t k = 1; k <= 80; k += 17) {
5361 GemmMicrokernelTester()
5362 .mr(2)
5363 .nr(8)
5364 .kr(2)
5365 .sr(4)
5366 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005367 .n(n)
Frank Barchardc7a032d2021-11-10 12:37:49 -08005368 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005369 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08005370 }
5371 }
5372 }
5373
Frank Barcharde22685a2021-11-12 11:36:58 -08005374 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, n_gt_8_strided_cn) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08005375 TEST_REQUIRES_ARM_NEON;
5376 for (uint32_t n = 9; n < 16; n++) {
5377 for (size_t k = 1; k <= 80; k += 17) {
5378 GemmMicrokernelTester()
5379 .mr(2)
5380 .nr(8)
5381 .kr(2)
5382 .sr(4)
5383 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005384 .n(n)
Frank Barchardc7a032d2021-11-10 12:37:49 -08005385 .k(k)
5386 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08005387 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08005388 }
5389 }
5390 }
5391
Frank Barcharde22685a2021-11-12 11:36:58 -08005392 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, n_gt_8_subtile) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08005393 TEST_REQUIRES_ARM_NEON;
5394 for (uint32_t n = 9; n < 16; n++) {
5395 for (size_t k = 1; k <= 80; k += 17) {
5396 for (uint32_t m = 1; m <= 2; m++) {
5397 GemmMicrokernelTester()
5398 .mr(2)
5399 .nr(8)
5400 .kr(2)
5401 .sr(4)
5402 .m(m)
5403 .n(n)
5404 .k(k)
5405 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005406 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08005407 }
5408 }
5409 }
5410 }
5411
Frank Barcharde22685a2021-11-12 11:36:58 -08005412 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, n_div_8) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08005413 TEST_REQUIRES_ARM_NEON;
5414 for (uint32_t n = 16; n <= 24; n += 8) {
5415 for (size_t k = 1; k <= 80; k += 17) {
5416 GemmMicrokernelTester()
5417 .mr(2)
5418 .nr(8)
5419 .kr(2)
5420 .sr(4)
5421 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005422 .n(n)
Frank Barchardc7a032d2021-11-10 12:37:49 -08005423 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005424 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08005425 }
5426 }
5427 }
5428
Frank Barcharde22685a2021-11-12 11:36:58 -08005429 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, n_div_8_strided_cn) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08005430 TEST_REQUIRES_ARM_NEON;
5431 for (uint32_t n = 16; n <= 24; n += 8) {
5432 for (size_t k = 1; k <= 80; k += 17) {
5433 GemmMicrokernelTester()
5434 .mr(2)
5435 .nr(8)
5436 .kr(2)
5437 .sr(4)
5438 .m(2)
5439 .n(n)
5440 .k(k)
5441 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08005442 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08005443 }
5444 }
5445 }
5446
Frank Barcharde22685a2021-11-12 11:36:58 -08005447 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, n_div_8_subtile) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08005448 TEST_REQUIRES_ARM_NEON;
5449 for (uint32_t n = 16; n <= 24; n += 8) {
5450 for (size_t k = 1; k <= 80; k += 17) {
5451 for (uint32_t m = 1; m <= 2; m++) {
5452 GemmMicrokernelTester()
5453 .mr(2)
5454 .nr(8)
5455 .kr(2)
5456 .sr(4)
5457 .m(m)
5458 .n(n)
5459 .k(k)
5460 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005461 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08005462 }
5463 }
5464 }
5465 }
5466
Frank Barcharde22685a2021-11-12 11:36:58 -08005467 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, small_kernel) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08005468 TEST_REQUIRES_ARM_NEON;
5469 for (size_t k = 1; k <= 80; k += 17) {
5470 GemmMicrokernelTester()
5471 .mr(2)
5472 .nr(8)
5473 .kr(2)
5474 .sr(4)
5475 .m(2)
5476 .n(8)
5477 .k(k)
5478 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08005479 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08005480 }
5481 }
5482
Frank Barcharde22685a2021-11-12 11:36:58 -08005483 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, small_kernel_subtile) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08005484 TEST_REQUIRES_ARM_NEON;
5485 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005486 for (uint32_t n = 1; n <= 8; n++) {
5487 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08005488 GemmMicrokernelTester()
5489 .mr(2)
5490 .nr(8)
5491 .kr(2)
5492 .sr(4)
5493 .m(m)
5494 .n(n)
5495 .k(k)
5496 .ks(3)
5497 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005498 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08005499 }
5500 }
5501 }
5502 }
5503
Frank Barcharde22685a2021-11-12 11:36:58 -08005504 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, n_gt_8_small_kernel) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08005505 TEST_REQUIRES_ARM_NEON;
5506 for (uint32_t n = 9; n < 16; n++) {
5507 for (size_t k = 1; k <= 80; k += 17) {
5508 GemmMicrokernelTester()
5509 .mr(2)
5510 .nr(8)
5511 .kr(2)
5512 .sr(4)
5513 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005514 .n(n)
Frank Barchardc7a032d2021-11-10 12:37:49 -08005515 .k(k)
5516 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08005517 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08005518 }
5519 }
5520 }
5521
Frank Barcharde22685a2021-11-12 11:36:58 -08005522 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, n_div_8_small_kernel) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08005523 TEST_REQUIRES_ARM_NEON;
5524 for (uint32_t n = 16; n <= 24; n += 8) {
5525 for (size_t k = 1; k <= 80; k += 17) {
5526 GemmMicrokernelTester()
5527 .mr(2)
5528 .nr(8)
5529 .kr(2)
5530 .sr(4)
5531 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005532 .n(n)
Frank Barchardc7a032d2021-11-10 12:37:49 -08005533 .k(k)
5534 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08005535 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08005536 }
5537 }
5538 }
5539
Frank Barcharde22685a2021-11-12 11:36:58 -08005540 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, strided_cm_subtile) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08005541 TEST_REQUIRES_ARM_NEON;
5542 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005543 for (uint32_t n = 1; n <= 8; n++) {
5544 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08005545 GemmMicrokernelTester()
5546 .mr(2)
5547 .nr(8)
5548 .kr(2)
5549 .sr(4)
5550 .m(m)
5551 .n(n)
5552 .k(k)
5553 .cm_stride(11)
5554 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005555 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08005556 }
5557 }
5558 }
5559 }
5560
Frank Barcharde22685a2021-11-12 11:36:58 -08005561 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, a_offset) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08005562 TEST_REQUIRES_ARM_NEON;
5563 for (size_t k = 1; k <= 80; k += 17) {
5564 GemmMicrokernelTester()
5565 .mr(2)
5566 .nr(8)
5567 .kr(2)
5568 .sr(4)
5569 .m(2)
5570 .n(8)
5571 .k(k)
5572 .ks(3)
5573 .a_offset(163)
Marat Dukhan50323b82022-01-11 00:12:01 -08005574 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08005575 }
5576 }
5577
Frank Barcharde22685a2021-11-12 11:36:58 -08005578 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, zero) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08005579 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08005580 for (size_t k = 1; k <= 80; k += 17) {
5581 for (uint32_t mz = 0; mz < 2; mz++) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08005582 GemmMicrokernelTester()
5583 .mr(2)
5584 .nr(8)
5585 .kr(2)
5586 .sr(4)
5587 .m(2)
5588 .n(8)
5589 .k(k)
5590 .ks(3)
5591 .a_offset(163)
5592 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08005593 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08005594 }
5595 }
5596 }
5597
Frank Barcharde22685a2021-11-12 11:36:58 -08005598 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, qmin) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08005599 TEST_REQUIRES_ARM_NEON;
5600 GemmMicrokernelTester()
5601 .mr(2)
5602 .nr(8)
5603 .kr(2)
5604 .sr(4)
5605 .m(2)
5606 .n(8)
5607 .k(16)
5608 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005609 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08005610 }
5611
Frank Barcharde22685a2021-11-12 11:36:58 -08005612 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, qmax) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08005613 TEST_REQUIRES_ARM_NEON;
5614 GemmMicrokernelTester()
5615 .mr(2)
5616 .nr(8)
5617 .kr(2)
5618 .sr(4)
5619 .m(2)
5620 .n(8)
5621 .k(16)
5622 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005623 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08005624 }
5625
Frank Barcharde22685a2021-11-12 11:36:58 -08005626 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, strided_cm) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08005627 TEST_REQUIRES_ARM_NEON;
5628 GemmMicrokernelTester()
5629 .mr(2)
5630 .nr(8)
5631 .kr(2)
5632 .sr(4)
5633 .m(2)
5634 .n(8)
5635 .k(16)
5636 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08005637 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08005638 }
5639#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
5640
5641
5642#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Frank Barcharde22685a2021-11-12 11:36:58 -08005643 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, k_eq_16) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08005644 TEST_REQUIRES_ARM_NEON_V8;
5645 GemmMicrokernelTester()
5646 .mr(1)
5647 .nr(8)
5648 .kr(2)
5649 .sr(4)
5650 .m(1)
5651 .n(8)
5652 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08005653 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08005654 }
5655
Frank Barcharde22685a2021-11-12 11:36:58 -08005656 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, strided_cn) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08005657 TEST_REQUIRES_ARM_NEON_V8;
5658 GemmMicrokernelTester()
5659 .mr(1)
5660 .nr(8)
5661 .kr(2)
5662 .sr(4)
5663 .m(1)
5664 .n(8)
5665 .k(16)
5666 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08005667 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08005668 }
5669
Frank Barcharde22685a2021-11-12 11:36:58 -08005670 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, k_eq_16_subtile) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08005671 TEST_REQUIRES_ARM_NEON_V8;
Zhi An Ng83844ae2022-01-14 09:52:25 -08005672 for (uint32_t n = 1; n <= 8; n++) {
5673 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08005674 GemmMicrokernelTester()
5675 .mr(1)
5676 .nr(8)
5677 .kr(2)
5678 .sr(4)
5679 .m(m)
5680 .n(n)
5681 .k(16)
5682 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005683 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08005684 }
5685 }
5686 }
5687
Frank Barcharde22685a2021-11-12 11:36:58 -08005688 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, k_eq_16_subtile_m) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08005689 TEST_REQUIRES_ARM_NEON_V8;
5690 for (uint32_t m = 1; m <= 1; m++) {
5691 GemmMicrokernelTester()
5692 .mr(1)
5693 .nr(8)
5694 .kr(2)
5695 .sr(4)
5696 .m(m)
5697 .n(8)
5698 .k(16)
5699 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005700 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08005701 }
5702 }
5703
Frank Barcharde22685a2021-11-12 11:36:58 -08005704 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, k_eq_16_subtile_n) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08005705 TEST_REQUIRES_ARM_NEON_V8;
5706 for (uint32_t n = 1; n <= 8; n++) {
5707 GemmMicrokernelTester()
5708 .mr(1)
5709 .nr(8)
5710 .kr(2)
5711 .sr(4)
5712 .m(1)
5713 .n(n)
5714 .k(16)
5715 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005716 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08005717 }
5718 }
5719
Frank Barcharde22685a2021-11-12 11:36:58 -08005720 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, k_lt_16) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08005721 TEST_REQUIRES_ARM_NEON_V8;
5722 for (size_t k = 1; k < 16; k++) {
5723 GemmMicrokernelTester()
5724 .mr(1)
5725 .nr(8)
5726 .kr(2)
5727 .sr(4)
5728 .m(1)
5729 .n(8)
5730 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005731 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08005732 }
5733 }
5734
Frank Barcharde22685a2021-11-12 11:36:58 -08005735 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, k_lt_16_subtile) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08005736 TEST_REQUIRES_ARM_NEON_V8;
5737 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005738 for (uint32_t n = 1; n <= 8; n++) {
5739 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08005740 GemmMicrokernelTester()
5741 .mr(1)
5742 .nr(8)
5743 .kr(2)
5744 .sr(4)
5745 .m(m)
5746 .n(n)
5747 .k(k)
5748 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005749 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08005750 }
5751 }
5752 }
5753 }
5754
Frank Barcharde22685a2021-11-12 11:36:58 -08005755 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, k_gt_16) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08005756 TEST_REQUIRES_ARM_NEON_V8;
5757 for (size_t k = 17; k < 32; k++) {
5758 GemmMicrokernelTester()
5759 .mr(1)
5760 .nr(8)
5761 .kr(2)
5762 .sr(4)
5763 .m(1)
5764 .n(8)
5765 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005766 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08005767 }
5768 }
5769
Frank Barcharde22685a2021-11-12 11:36:58 -08005770 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, k_gt_16_subtile) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08005771 TEST_REQUIRES_ARM_NEON_V8;
5772 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005773 for (uint32_t n = 1; n <= 8; n++) {
5774 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08005775 GemmMicrokernelTester()
5776 .mr(1)
5777 .nr(8)
5778 .kr(2)
5779 .sr(4)
5780 .m(m)
5781 .n(n)
5782 .k(k)
5783 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005784 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08005785 }
5786 }
5787 }
5788 }
5789
Frank Barcharde22685a2021-11-12 11:36:58 -08005790 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, k_div_16) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08005791 TEST_REQUIRES_ARM_NEON_V8;
5792 for (size_t k = 32; k <= 160; k += 16) {
5793 GemmMicrokernelTester()
5794 .mr(1)
5795 .nr(8)
5796 .kr(2)
5797 .sr(4)
5798 .m(1)
5799 .n(8)
5800 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005801 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08005802 }
5803 }
5804
Frank Barcharde22685a2021-11-12 11:36:58 -08005805 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, k_div_16_subtile) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08005806 TEST_REQUIRES_ARM_NEON_V8;
5807 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005808 for (uint32_t n = 1; n <= 8; n++) {
5809 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08005810 GemmMicrokernelTester()
5811 .mr(1)
5812 .nr(8)
5813 .kr(2)
5814 .sr(4)
5815 .m(m)
5816 .n(n)
5817 .k(k)
5818 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005819 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08005820 }
5821 }
5822 }
5823 }
5824
Frank Barcharde22685a2021-11-12 11:36:58 -08005825 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, n_gt_8) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08005826 TEST_REQUIRES_ARM_NEON_V8;
5827 for (uint32_t n = 9; n < 16; n++) {
5828 for (size_t k = 1; k <= 80; k += 17) {
5829 GemmMicrokernelTester()
5830 .mr(1)
5831 .nr(8)
5832 .kr(2)
5833 .sr(4)
5834 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005835 .n(n)
Frank Barchardc7a032d2021-11-10 12:37:49 -08005836 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005837 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08005838 }
5839 }
5840 }
5841
Frank Barcharde22685a2021-11-12 11:36:58 -08005842 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, n_gt_8_strided_cn) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08005843 TEST_REQUIRES_ARM_NEON_V8;
5844 for (uint32_t n = 9; n < 16; n++) {
5845 for (size_t k = 1; k <= 80; k += 17) {
5846 GemmMicrokernelTester()
5847 .mr(1)
5848 .nr(8)
5849 .kr(2)
5850 .sr(4)
5851 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005852 .n(n)
Frank Barchardc7a032d2021-11-10 12:37:49 -08005853 .k(k)
5854 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08005855 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08005856 }
5857 }
5858 }
5859
Frank Barcharde22685a2021-11-12 11:36:58 -08005860 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, n_gt_8_subtile) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08005861 TEST_REQUIRES_ARM_NEON_V8;
5862 for (uint32_t n = 9; n < 16; n++) {
5863 for (size_t k = 1; k <= 80; k += 17) {
5864 for (uint32_t m = 1; m <= 1; m++) {
5865 GemmMicrokernelTester()
5866 .mr(1)
5867 .nr(8)
5868 .kr(2)
5869 .sr(4)
5870 .m(m)
5871 .n(n)
5872 .k(k)
5873 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005874 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08005875 }
5876 }
5877 }
5878 }
5879
Frank Barcharde22685a2021-11-12 11:36:58 -08005880 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, n_div_8) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08005881 TEST_REQUIRES_ARM_NEON_V8;
5882 for (uint32_t n = 16; n <= 24; n += 8) {
5883 for (size_t k = 1; k <= 80; k += 17) {
5884 GemmMicrokernelTester()
5885 .mr(1)
5886 .nr(8)
5887 .kr(2)
5888 .sr(4)
5889 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005890 .n(n)
Frank Barchardc7a032d2021-11-10 12:37:49 -08005891 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005892 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08005893 }
5894 }
5895 }
5896
Frank Barcharde22685a2021-11-12 11:36:58 -08005897 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, n_div_8_strided_cn) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08005898 TEST_REQUIRES_ARM_NEON_V8;
5899 for (uint32_t n = 16; n <= 24; n += 8) {
5900 for (size_t k = 1; k <= 80; k += 17) {
5901 GemmMicrokernelTester()
5902 .mr(1)
5903 .nr(8)
5904 .kr(2)
5905 .sr(4)
5906 .m(1)
5907 .n(n)
5908 .k(k)
5909 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08005910 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08005911 }
5912 }
5913 }
5914
Frank Barcharde22685a2021-11-12 11:36:58 -08005915 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, n_div_8_subtile) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08005916 TEST_REQUIRES_ARM_NEON_V8;
5917 for (uint32_t n = 16; n <= 24; n += 8) {
5918 for (size_t k = 1; k <= 80; k += 17) {
5919 for (uint32_t m = 1; m <= 1; m++) {
5920 GemmMicrokernelTester()
5921 .mr(1)
5922 .nr(8)
5923 .kr(2)
5924 .sr(4)
5925 .m(m)
5926 .n(n)
5927 .k(k)
5928 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005929 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08005930 }
5931 }
5932 }
5933 }
5934
Frank Barcharde22685a2021-11-12 11:36:58 -08005935 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, small_kernel) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08005936 TEST_REQUIRES_ARM_NEON_V8;
5937 for (size_t k = 1; k <= 80; k += 17) {
5938 GemmMicrokernelTester()
5939 .mr(1)
5940 .nr(8)
5941 .kr(2)
5942 .sr(4)
5943 .m(1)
5944 .n(8)
5945 .k(k)
5946 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08005947 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08005948 }
5949 }
5950
Frank Barcharde22685a2021-11-12 11:36:58 -08005951 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, small_kernel_subtile) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08005952 TEST_REQUIRES_ARM_NEON_V8;
5953 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005954 for (uint32_t n = 1; n <= 8; n++) {
5955 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08005956 GemmMicrokernelTester()
5957 .mr(1)
5958 .nr(8)
5959 .kr(2)
5960 .sr(4)
5961 .m(m)
5962 .n(n)
5963 .k(k)
5964 .ks(3)
5965 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005966 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08005967 }
5968 }
5969 }
5970 }
5971
Frank Barcharde22685a2021-11-12 11:36:58 -08005972 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, n_gt_8_small_kernel) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08005973 TEST_REQUIRES_ARM_NEON_V8;
5974 for (uint32_t n = 9; n < 16; n++) {
5975 for (size_t k = 1; k <= 80; k += 17) {
5976 GemmMicrokernelTester()
5977 .mr(1)
5978 .nr(8)
5979 .kr(2)
5980 .sr(4)
5981 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005982 .n(n)
Frank Barchardc7a032d2021-11-10 12:37:49 -08005983 .k(k)
5984 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08005985 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08005986 }
5987 }
5988 }
5989
Frank Barcharde22685a2021-11-12 11:36:58 -08005990 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, n_div_8_small_kernel) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08005991 TEST_REQUIRES_ARM_NEON_V8;
5992 for (uint32_t n = 16; n <= 24; n += 8) {
5993 for (size_t k = 1; k <= 80; k += 17) {
5994 GemmMicrokernelTester()
5995 .mr(1)
5996 .nr(8)
5997 .kr(2)
5998 .sr(4)
5999 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006000 .n(n)
Frank Barchardc7a032d2021-11-10 12:37:49 -08006001 .k(k)
6002 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08006003 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08006004 }
6005 }
6006 }
6007
Frank Barcharde22685a2021-11-12 11:36:58 -08006008 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, strided_cm_subtile) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08006009 TEST_REQUIRES_ARM_NEON_V8;
6010 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006011 for (uint32_t n = 1; n <= 8; n++) {
6012 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08006013 GemmMicrokernelTester()
6014 .mr(1)
6015 .nr(8)
6016 .kr(2)
6017 .sr(4)
6018 .m(m)
6019 .n(n)
6020 .k(k)
6021 .cm_stride(11)
6022 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006023 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08006024 }
6025 }
6026 }
6027 }
6028
Frank Barcharde22685a2021-11-12 11:36:58 -08006029 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, a_offset) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08006030 TEST_REQUIRES_ARM_NEON_V8;
6031 for (size_t k = 1; k <= 80; k += 17) {
6032 GemmMicrokernelTester()
6033 .mr(1)
6034 .nr(8)
6035 .kr(2)
6036 .sr(4)
6037 .m(1)
6038 .n(8)
6039 .k(k)
6040 .ks(3)
6041 .a_offset(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08006042 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08006043 }
6044 }
6045
Frank Barcharde22685a2021-11-12 11:36:58 -08006046 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, zero) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08006047 TEST_REQUIRES_ARM_NEON_V8;
Zhi An Ng83844ae2022-01-14 09:52:25 -08006048 for (size_t k = 1; k <= 80; k += 17) {
6049 for (uint32_t mz = 0; mz < 1; mz++) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08006050 GemmMicrokernelTester()
6051 .mr(1)
6052 .nr(8)
6053 .kr(2)
6054 .sr(4)
6055 .m(1)
6056 .n(8)
6057 .k(k)
6058 .ks(3)
6059 .a_offset(83)
6060 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08006061 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08006062 }
6063 }
6064 }
6065
Frank Barcharde22685a2021-11-12 11:36:58 -08006066 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, qmin) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08006067 TEST_REQUIRES_ARM_NEON_V8;
6068 GemmMicrokernelTester()
6069 .mr(1)
6070 .nr(8)
6071 .kr(2)
6072 .sr(4)
6073 .m(1)
6074 .n(8)
6075 .k(16)
6076 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006077 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08006078 }
6079
Frank Barcharde22685a2021-11-12 11:36:58 -08006080 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, qmax) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08006081 TEST_REQUIRES_ARM_NEON_V8;
6082 GemmMicrokernelTester()
6083 .mr(1)
6084 .nr(8)
6085 .kr(2)
6086 .sr(4)
6087 .m(1)
6088 .n(8)
6089 .k(16)
6090 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006091 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08006092 }
6093
Frank Barcharde22685a2021-11-12 11:36:58 -08006094 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, strided_cm) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08006095 TEST_REQUIRES_ARM_NEON_V8;
6096 GemmMicrokernelTester()
6097 .mr(1)
6098 .nr(8)
6099 .kr(2)
6100 .sr(4)
6101 .m(1)
6102 .n(8)
6103 .k(16)
6104 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08006105 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08006106 }
6107#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
6108
6109
6110#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Frank Barcharde22685a2021-11-12 11:36:58 -08006111 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, k_eq_16) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08006112 TEST_REQUIRES_ARM_NEON_V8;
6113 GemmMicrokernelTester()
6114 .mr(2)
6115 .nr(8)
6116 .kr(2)
6117 .sr(4)
6118 .m(2)
6119 .n(8)
6120 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08006121 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08006122 }
6123
Frank Barcharde22685a2021-11-12 11:36:58 -08006124 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, strided_cn) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08006125 TEST_REQUIRES_ARM_NEON_V8;
6126 GemmMicrokernelTester()
6127 .mr(2)
6128 .nr(8)
6129 .kr(2)
6130 .sr(4)
6131 .m(2)
6132 .n(8)
6133 .k(16)
6134 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08006135 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08006136 }
6137
Frank Barcharde22685a2021-11-12 11:36:58 -08006138 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, k_eq_16_subtile) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08006139 TEST_REQUIRES_ARM_NEON_V8;
Zhi An Ng83844ae2022-01-14 09:52:25 -08006140 for (uint32_t n = 1; n <= 8; n++) {
6141 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08006142 GemmMicrokernelTester()
6143 .mr(2)
6144 .nr(8)
6145 .kr(2)
6146 .sr(4)
6147 .m(m)
6148 .n(n)
6149 .k(16)
6150 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006151 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08006152 }
6153 }
6154 }
6155
Frank Barcharde22685a2021-11-12 11:36:58 -08006156 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, k_eq_16_subtile_m) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08006157 TEST_REQUIRES_ARM_NEON_V8;
6158 for (uint32_t m = 1; m <= 2; m++) {
6159 GemmMicrokernelTester()
6160 .mr(2)
6161 .nr(8)
6162 .kr(2)
6163 .sr(4)
6164 .m(m)
6165 .n(8)
6166 .k(16)
6167 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006168 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08006169 }
6170 }
6171
Frank Barcharde22685a2021-11-12 11:36:58 -08006172 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, k_eq_16_subtile_n) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08006173 TEST_REQUIRES_ARM_NEON_V8;
6174 for (uint32_t n = 1; n <= 8; n++) {
6175 GemmMicrokernelTester()
6176 .mr(2)
6177 .nr(8)
6178 .kr(2)
6179 .sr(4)
6180 .m(2)
6181 .n(n)
6182 .k(16)
6183 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006184 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08006185 }
6186 }
6187
Frank Barcharde22685a2021-11-12 11:36:58 -08006188 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, k_lt_16) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08006189 TEST_REQUIRES_ARM_NEON_V8;
6190 for (size_t k = 1; k < 16; k++) {
6191 GemmMicrokernelTester()
6192 .mr(2)
6193 .nr(8)
6194 .kr(2)
6195 .sr(4)
6196 .m(2)
6197 .n(8)
6198 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006199 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08006200 }
6201 }
6202
Frank Barcharde22685a2021-11-12 11:36:58 -08006203 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, k_lt_16_subtile) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08006204 TEST_REQUIRES_ARM_NEON_V8;
6205 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006206 for (uint32_t n = 1; n <= 8; n++) {
6207 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08006208 GemmMicrokernelTester()
6209 .mr(2)
6210 .nr(8)
6211 .kr(2)
6212 .sr(4)
6213 .m(m)
6214 .n(n)
6215 .k(k)
6216 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006217 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08006218 }
6219 }
6220 }
6221 }
6222
Frank Barcharde22685a2021-11-12 11:36:58 -08006223 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, k_gt_16) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08006224 TEST_REQUIRES_ARM_NEON_V8;
6225 for (size_t k = 17; k < 32; k++) {
6226 GemmMicrokernelTester()
6227 .mr(2)
6228 .nr(8)
6229 .kr(2)
6230 .sr(4)
6231 .m(2)
6232 .n(8)
6233 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006234 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08006235 }
6236 }
6237
Frank Barcharde22685a2021-11-12 11:36:58 -08006238 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, k_gt_16_subtile) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08006239 TEST_REQUIRES_ARM_NEON_V8;
6240 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006241 for (uint32_t n = 1; n <= 8; n++) {
6242 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08006243 GemmMicrokernelTester()
6244 .mr(2)
6245 .nr(8)
6246 .kr(2)
6247 .sr(4)
6248 .m(m)
6249 .n(n)
6250 .k(k)
6251 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006252 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08006253 }
6254 }
6255 }
6256 }
6257
Frank Barcharde22685a2021-11-12 11:36:58 -08006258 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, k_div_16) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08006259 TEST_REQUIRES_ARM_NEON_V8;
6260 for (size_t k = 32; k <= 160; k += 16) {
6261 GemmMicrokernelTester()
6262 .mr(2)
6263 .nr(8)
6264 .kr(2)
6265 .sr(4)
6266 .m(2)
6267 .n(8)
6268 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006269 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08006270 }
6271 }
6272
Frank Barcharde22685a2021-11-12 11:36:58 -08006273 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, k_div_16_subtile) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08006274 TEST_REQUIRES_ARM_NEON_V8;
6275 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006276 for (uint32_t n = 1; n <= 8; n++) {
6277 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08006278 GemmMicrokernelTester()
6279 .mr(2)
6280 .nr(8)
6281 .kr(2)
6282 .sr(4)
6283 .m(m)
6284 .n(n)
6285 .k(k)
6286 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006287 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08006288 }
6289 }
6290 }
6291 }
6292
Frank Barcharde22685a2021-11-12 11:36:58 -08006293 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, n_gt_8) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08006294 TEST_REQUIRES_ARM_NEON_V8;
6295 for (uint32_t n = 9; n < 16; n++) {
6296 for (size_t k = 1; k <= 80; k += 17) {
6297 GemmMicrokernelTester()
6298 .mr(2)
6299 .nr(8)
6300 .kr(2)
6301 .sr(4)
6302 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006303 .n(n)
Frank Barchardc7a032d2021-11-10 12:37:49 -08006304 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006305 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08006306 }
6307 }
6308 }
6309
Frank Barcharde22685a2021-11-12 11:36:58 -08006310 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, n_gt_8_strided_cn) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08006311 TEST_REQUIRES_ARM_NEON_V8;
6312 for (uint32_t n = 9; n < 16; n++) {
6313 for (size_t k = 1; k <= 80; k += 17) {
6314 GemmMicrokernelTester()
6315 .mr(2)
6316 .nr(8)
6317 .kr(2)
6318 .sr(4)
6319 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006320 .n(n)
Frank Barchardc7a032d2021-11-10 12:37:49 -08006321 .k(k)
6322 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08006323 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08006324 }
6325 }
6326 }
6327
Frank Barcharde22685a2021-11-12 11:36:58 -08006328 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, n_gt_8_subtile) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08006329 TEST_REQUIRES_ARM_NEON_V8;
6330 for (uint32_t n = 9; n < 16; n++) {
6331 for (size_t k = 1; k <= 80; k += 17) {
6332 for (uint32_t m = 1; m <= 2; m++) {
6333 GemmMicrokernelTester()
6334 .mr(2)
6335 .nr(8)
6336 .kr(2)
6337 .sr(4)
6338 .m(m)
6339 .n(n)
6340 .k(k)
6341 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006342 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08006343 }
6344 }
6345 }
6346 }
6347
Frank Barcharde22685a2021-11-12 11:36:58 -08006348 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, n_div_8) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08006349 TEST_REQUIRES_ARM_NEON_V8;
6350 for (uint32_t n = 16; n <= 24; n += 8) {
6351 for (size_t k = 1; k <= 80; k += 17) {
6352 GemmMicrokernelTester()
6353 .mr(2)
6354 .nr(8)
6355 .kr(2)
6356 .sr(4)
6357 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006358 .n(n)
Frank Barchardc7a032d2021-11-10 12:37:49 -08006359 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006360 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08006361 }
6362 }
6363 }
6364
Frank Barcharde22685a2021-11-12 11:36:58 -08006365 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, n_div_8_strided_cn) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08006366 TEST_REQUIRES_ARM_NEON_V8;
6367 for (uint32_t n = 16; n <= 24; n += 8) {
6368 for (size_t k = 1; k <= 80; k += 17) {
6369 GemmMicrokernelTester()
6370 .mr(2)
6371 .nr(8)
6372 .kr(2)
6373 .sr(4)
6374 .m(2)
6375 .n(n)
6376 .k(k)
6377 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08006378 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08006379 }
6380 }
6381 }
6382
Frank Barcharde22685a2021-11-12 11:36:58 -08006383 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, n_div_8_subtile) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08006384 TEST_REQUIRES_ARM_NEON_V8;
6385 for (uint32_t n = 16; n <= 24; n += 8) {
6386 for (size_t k = 1; k <= 80; k += 17) {
6387 for (uint32_t m = 1; m <= 2; m++) {
6388 GemmMicrokernelTester()
6389 .mr(2)
6390 .nr(8)
6391 .kr(2)
6392 .sr(4)
6393 .m(m)
6394 .n(n)
6395 .k(k)
6396 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006397 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08006398 }
6399 }
6400 }
6401 }
6402
Frank Barcharde22685a2021-11-12 11:36:58 -08006403 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, small_kernel) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08006404 TEST_REQUIRES_ARM_NEON_V8;
6405 for (size_t k = 1; k <= 80; k += 17) {
6406 GemmMicrokernelTester()
6407 .mr(2)
6408 .nr(8)
6409 .kr(2)
6410 .sr(4)
6411 .m(2)
6412 .n(8)
6413 .k(k)
6414 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08006415 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08006416 }
6417 }
6418
Frank Barcharde22685a2021-11-12 11:36:58 -08006419 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, small_kernel_subtile) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08006420 TEST_REQUIRES_ARM_NEON_V8;
6421 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006422 for (uint32_t n = 1; n <= 8; n++) {
6423 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08006424 GemmMicrokernelTester()
6425 .mr(2)
6426 .nr(8)
6427 .kr(2)
6428 .sr(4)
6429 .m(m)
6430 .n(n)
6431 .k(k)
6432 .ks(3)
6433 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006434 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08006435 }
6436 }
6437 }
6438 }
6439
Frank Barcharde22685a2021-11-12 11:36:58 -08006440 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, n_gt_8_small_kernel) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08006441 TEST_REQUIRES_ARM_NEON_V8;
6442 for (uint32_t n = 9; n < 16; n++) {
6443 for (size_t k = 1; k <= 80; k += 17) {
6444 GemmMicrokernelTester()
6445 .mr(2)
6446 .nr(8)
6447 .kr(2)
6448 .sr(4)
6449 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006450 .n(n)
Frank Barchardc7a032d2021-11-10 12:37:49 -08006451 .k(k)
6452 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08006453 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08006454 }
6455 }
6456 }
6457
Frank Barcharde22685a2021-11-12 11:36:58 -08006458 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, n_div_8_small_kernel) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08006459 TEST_REQUIRES_ARM_NEON_V8;
6460 for (uint32_t n = 16; n <= 24; n += 8) {
6461 for (size_t k = 1; k <= 80; k += 17) {
6462 GemmMicrokernelTester()
6463 .mr(2)
6464 .nr(8)
6465 .kr(2)
6466 .sr(4)
6467 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006468 .n(n)
Frank Barchardc7a032d2021-11-10 12:37:49 -08006469 .k(k)
6470 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08006471 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08006472 }
6473 }
6474 }
6475
Frank Barcharde22685a2021-11-12 11:36:58 -08006476 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, strided_cm_subtile) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08006477 TEST_REQUIRES_ARM_NEON_V8;
6478 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006479 for (uint32_t n = 1; n <= 8; n++) {
6480 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08006481 GemmMicrokernelTester()
6482 .mr(2)
6483 .nr(8)
6484 .kr(2)
6485 .sr(4)
6486 .m(m)
6487 .n(n)
6488 .k(k)
6489 .cm_stride(11)
6490 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006491 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08006492 }
6493 }
6494 }
6495 }
6496
Frank Barcharde22685a2021-11-12 11:36:58 -08006497 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, a_offset) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08006498 TEST_REQUIRES_ARM_NEON_V8;
6499 for (size_t k = 1; k <= 80; k += 17) {
6500 GemmMicrokernelTester()
6501 .mr(2)
6502 .nr(8)
6503 .kr(2)
6504 .sr(4)
6505 .m(2)
6506 .n(8)
6507 .k(k)
6508 .ks(3)
6509 .a_offset(163)
Marat Dukhan50323b82022-01-11 00:12:01 -08006510 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08006511 }
6512 }
6513
Frank Barcharde22685a2021-11-12 11:36:58 -08006514 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, zero) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08006515 TEST_REQUIRES_ARM_NEON_V8;
Zhi An Ng83844ae2022-01-14 09:52:25 -08006516 for (size_t k = 1; k <= 80; k += 17) {
6517 for (uint32_t mz = 0; mz < 2; mz++) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08006518 GemmMicrokernelTester()
6519 .mr(2)
6520 .nr(8)
6521 .kr(2)
6522 .sr(4)
6523 .m(2)
6524 .n(8)
6525 .k(k)
6526 .ks(3)
6527 .a_offset(163)
6528 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08006529 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08006530 }
6531 }
6532 }
6533
Frank Barcharde22685a2021-11-12 11:36:58 -08006534 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, qmin) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08006535 TEST_REQUIRES_ARM_NEON_V8;
6536 GemmMicrokernelTester()
6537 .mr(2)
6538 .nr(8)
6539 .kr(2)
6540 .sr(4)
6541 .m(2)
6542 .n(8)
6543 .k(16)
6544 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006545 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08006546 }
6547
Frank Barcharde22685a2021-11-12 11:36:58 -08006548 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, qmax) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08006549 TEST_REQUIRES_ARM_NEON_V8;
6550 GemmMicrokernelTester()
6551 .mr(2)
6552 .nr(8)
6553 .kr(2)
6554 .sr(4)
6555 .m(2)
6556 .n(8)
6557 .k(16)
6558 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006559 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08006560 }
6561
Frank Barcharde22685a2021-11-12 11:36:58 -08006562 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, strided_cm) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08006563 TEST_REQUIRES_ARM_NEON_V8;
6564 GemmMicrokernelTester()
6565 .mr(2)
6566 .nr(8)
6567 .kr(2)
6568 .sr(4)
6569 .m(2)
6570 .n(8)
6571 .k(16)
6572 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08006573 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchardc7a032d2021-11-10 12:37:49 -08006574 }
6575#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
6576
6577
6578#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Frank Barcharde22685a2021-11-12 11:36:58 -08006579 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, k_eq_16) {
Frank Barchard287952a2021-11-03 15:26:45 -07006580 TEST_REQUIRES_ARM_NEON;
6581 GemmMicrokernelTester()
6582 .mr(1)
6583 .nr(8)
6584 .kr(4)
6585 .sr(1)
6586 .m(1)
6587 .n(8)
6588 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08006589 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07006590 }
6591
Frank Barcharde22685a2021-11-12 11:36:58 -08006592 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, strided_cn) {
Frank Barchard287952a2021-11-03 15:26:45 -07006593 TEST_REQUIRES_ARM_NEON;
6594 GemmMicrokernelTester()
6595 .mr(1)
6596 .nr(8)
6597 .kr(4)
6598 .sr(1)
6599 .m(1)
6600 .n(8)
6601 .k(16)
6602 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08006603 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07006604 }
6605
Frank Barcharde22685a2021-11-12 11:36:58 -08006606 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, k_eq_16_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -07006607 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08006608 for (uint32_t n = 1; n <= 8; n++) {
6609 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -07006610 GemmMicrokernelTester()
6611 .mr(1)
6612 .nr(8)
6613 .kr(4)
6614 .sr(1)
6615 .m(m)
6616 .n(n)
6617 .k(16)
6618 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006619 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07006620 }
6621 }
6622 }
6623
Frank Barcharde22685a2021-11-12 11:36:58 -08006624 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, k_eq_16_subtile_m) {
Frank Barchard287952a2021-11-03 15:26:45 -07006625 TEST_REQUIRES_ARM_NEON;
6626 for (uint32_t m = 1; m <= 1; m++) {
6627 GemmMicrokernelTester()
6628 .mr(1)
6629 .nr(8)
6630 .kr(4)
6631 .sr(1)
6632 .m(m)
6633 .n(8)
6634 .k(16)
6635 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006636 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07006637 }
6638 }
6639
Frank Barcharde22685a2021-11-12 11:36:58 -08006640 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, k_eq_16_subtile_n) {
Frank Barchard287952a2021-11-03 15:26:45 -07006641 TEST_REQUIRES_ARM_NEON;
6642 for (uint32_t n = 1; n <= 8; n++) {
6643 GemmMicrokernelTester()
6644 .mr(1)
6645 .nr(8)
6646 .kr(4)
6647 .sr(1)
6648 .m(1)
6649 .n(n)
6650 .k(16)
6651 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006652 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07006653 }
6654 }
6655
Frank Barcharde22685a2021-11-12 11:36:58 -08006656 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, k_lt_16) {
Frank Barchard287952a2021-11-03 15:26:45 -07006657 TEST_REQUIRES_ARM_NEON;
6658 for (size_t k = 1; k < 16; k++) {
6659 GemmMicrokernelTester()
6660 .mr(1)
6661 .nr(8)
6662 .kr(4)
6663 .sr(1)
6664 .m(1)
6665 .n(8)
6666 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006667 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07006668 }
6669 }
6670
Frank Barcharde22685a2021-11-12 11:36:58 -08006671 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, k_lt_16_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -07006672 TEST_REQUIRES_ARM_NEON;
6673 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006674 for (uint32_t n = 1; n <= 8; n++) {
6675 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -07006676 GemmMicrokernelTester()
6677 .mr(1)
6678 .nr(8)
6679 .kr(4)
6680 .sr(1)
6681 .m(m)
6682 .n(n)
6683 .k(k)
6684 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006685 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07006686 }
6687 }
6688 }
6689 }
6690
Frank Barcharde22685a2021-11-12 11:36:58 -08006691 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, k_gt_16) {
Frank Barchard287952a2021-11-03 15:26:45 -07006692 TEST_REQUIRES_ARM_NEON;
6693 for (size_t k = 17; k < 32; k++) {
6694 GemmMicrokernelTester()
6695 .mr(1)
6696 .nr(8)
6697 .kr(4)
6698 .sr(1)
6699 .m(1)
6700 .n(8)
6701 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006702 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07006703 }
6704 }
6705
Frank Barcharde22685a2021-11-12 11:36:58 -08006706 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, k_gt_16_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -07006707 TEST_REQUIRES_ARM_NEON;
6708 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006709 for (uint32_t n = 1; n <= 8; n++) {
6710 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -07006711 GemmMicrokernelTester()
6712 .mr(1)
6713 .nr(8)
6714 .kr(4)
6715 .sr(1)
6716 .m(m)
6717 .n(n)
6718 .k(k)
6719 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006720 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07006721 }
6722 }
6723 }
6724 }
6725
Frank Barcharde22685a2021-11-12 11:36:58 -08006726 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, k_div_16) {
Frank Barchard287952a2021-11-03 15:26:45 -07006727 TEST_REQUIRES_ARM_NEON;
6728 for (size_t k = 32; k <= 160; k += 16) {
6729 GemmMicrokernelTester()
6730 .mr(1)
6731 .nr(8)
6732 .kr(4)
6733 .sr(1)
6734 .m(1)
6735 .n(8)
6736 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006737 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07006738 }
6739 }
6740
Frank Barcharde22685a2021-11-12 11:36:58 -08006741 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, k_div_16_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -07006742 TEST_REQUIRES_ARM_NEON;
6743 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006744 for (uint32_t n = 1; n <= 8; n++) {
6745 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -07006746 GemmMicrokernelTester()
6747 .mr(1)
6748 .nr(8)
6749 .kr(4)
6750 .sr(1)
6751 .m(m)
6752 .n(n)
6753 .k(k)
6754 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006755 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07006756 }
6757 }
6758 }
6759 }
6760
Frank Barcharde22685a2021-11-12 11:36:58 -08006761 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, n_gt_8) {
Frank Barchard287952a2021-11-03 15:26:45 -07006762 TEST_REQUIRES_ARM_NEON;
6763 for (uint32_t n = 9; n < 16; n++) {
6764 for (size_t k = 1; k <= 80; k += 17) {
6765 GemmMicrokernelTester()
6766 .mr(1)
6767 .nr(8)
6768 .kr(4)
6769 .sr(1)
6770 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006771 .n(n)
Frank Barchard287952a2021-11-03 15:26:45 -07006772 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006773 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07006774 }
6775 }
6776 }
6777
Frank Barcharde22685a2021-11-12 11:36:58 -08006778 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, n_gt_8_strided_cn) {
Frank Barchard287952a2021-11-03 15:26:45 -07006779 TEST_REQUIRES_ARM_NEON;
6780 for (uint32_t n = 9; n < 16; n++) {
6781 for (size_t k = 1; k <= 80; k += 17) {
6782 GemmMicrokernelTester()
6783 .mr(1)
6784 .nr(8)
6785 .kr(4)
6786 .sr(1)
6787 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006788 .n(n)
Frank Barchard287952a2021-11-03 15:26:45 -07006789 .k(k)
6790 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08006791 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07006792 }
6793 }
6794 }
6795
Frank Barcharde22685a2021-11-12 11:36:58 -08006796 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, n_gt_8_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -07006797 TEST_REQUIRES_ARM_NEON;
6798 for (uint32_t n = 9; n < 16; n++) {
6799 for (size_t k = 1; k <= 80; k += 17) {
6800 for (uint32_t m = 1; m <= 1; m++) {
6801 GemmMicrokernelTester()
6802 .mr(1)
6803 .nr(8)
6804 .kr(4)
6805 .sr(1)
6806 .m(m)
6807 .n(n)
6808 .k(k)
6809 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006810 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07006811 }
6812 }
6813 }
6814 }
6815
Frank Barcharde22685a2021-11-12 11:36:58 -08006816 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, n_div_8) {
Frank Barchard287952a2021-11-03 15:26:45 -07006817 TEST_REQUIRES_ARM_NEON;
6818 for (uint32_t n = 16; n <= 24; n += 8) {
6819 for (size_t k = 1; k <= 80; k += 17) {
6820 GemmMicrokernelTester()
6821 .mr(1)
6822 .nr(8)
6823 .kr(4)
6824 .sr(1)
6825 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006826 .n(n)
Frank Barchard287952a2021-11-03 15:26:45 -07006827 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006828 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07006829 }
6830 }
6831 }
6832
Frank Barcharde22685a2021-11-12 11:36:58 -08006833 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, n_div_8_strided_cn) {
Frank Barchard287952a2021-11-03 15:26:45 -07006834 TEST_REQUIRES_ARM_NEON;
6835 for (uint32_t n = 16; n <= 24; n += 8) {
6836 for (size_t k = 1; k <= 80; k += 17) {
6837 GemmMicrokernelTester()
6838 .mr(1)
6839 .nr(8)
6840 .kr(4)
6841 .sr(1)
6842 .m(1)
6843 .n(n)
6844 .k(k)
6845 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08006846 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07006847 }
6848 }
6849 }
6850
Frank Barcharde22685a2021-11-12 11:36:58 -08006851 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, n_div_8_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -07006852 TEST_REQUIRES_ARM_NEON;
6853 for (uint32_t n = 16; n <= 24; n += 8) {
6854 for (size_t k = 1; k <= 80; k += 17) {
6855 for (uint32_t m = 1; m <= 1; m++) {
6856 GemmMicrokernelTester()
6857 .mr(1)
6858 .nr(8)
6859 .kr(4)
6860 .sr(1)
6861 .m(m)
6862 .n(n)
6863 .k(k)
6864 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006865 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07006866 }
6867 }
6868 }
6869 }
6870
Frank Barcharde22685a2021-11-12 11:36:58 -08006871 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, small_kernel) {
Frank Barchard287952a2021-11-03 15:26:45 -07006872 TEST_REQUIRES_ARM_NEON;
6873 for (size_t k = 1; k <= 80; k += 17) {
6874 GemmMicrokernelTester()
6875 .mr(1)
6876 .nr(8)
6877 .kr(4)
6878 .sr(1)
6879 .m(1)
6880 .n(8)
6881 .k(k)
6882 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08006883 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07006884 }
6885 }
6886
Frank Barcharde22685a2021-11-12 11:36:58 -08006887 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, small_kernel_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -07006888 TEST_REQUIRES_ARM_NEON;
6889 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006890 for (uint32_t n = 1; n <= 8; n++) {
6891 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -07006892 GemmMicrokernelTester()
6893 .mr(1)
6894 .nr(8)
6895 .kr(4)
6896 .sr(1)
6897 .m(m)
6898 .n(n)
6899 .k(k)
6900 .ks(3)
6901 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006902 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07006903 }
6904 }
6905 }
6906 }
6907
Frank Barcharde22685a2021-11-12 11:36:58 -08006908 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, n_gt_8_small_kernel) {
Frank Barchard287952a2021-11-03 15:26:45 -07006909 TEST_REQUIRES_ARM_NEON;
6910 for (uint32_t n = 9; n < 16; n++) {
6911 for (size_t k = 1; k <= 80; k += 17) {
6912 GemmMicrokernelTester()
6913 .mr(1)
6914 .nr(8)
6915 .kr(4)
6916 .sr(1)
6917 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006918 .n(n)
Frank Barchard287952a2021-11-03 15:26:45 -07006919 .k(k)
6920 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08006921 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07006922 }
6923 }
6924 }
6925
Frank Barcharde22685a2021-11-12 11:36:58 -08006926 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, n_div_8_small_kernel) {
Frank Barchard287952a2021-11-03 15:26:45 -07006927 TEST_REQUIRES_ARM_NEON;
6928 for (uint32_t n = 16; n <= 24; n += 8) {
6929 for (size_t k = 1; k <= 80; k += 17) {
6930 GemmMicrokernelTester()
6931 .mr(1)
6932 .nr(8)
6933 .kr(4)
6934 .sr(1)
6935 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006936 .n(n)
Frank Barchard287952a2021-11-03 15:26:45 -07006937 .k(k)
6938 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08006939 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07006940 }
6941 }
6942 }
6943
Frank Barcharde22685a2021-11-12 11:36:58 -08006944 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, strided_cm_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -07006945 TEST_REQUIRES_ARM_NEON;
6946 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006947 for (uint32_t n = 1; n <= 8; n++) {
6948 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -07006949 GemmMicrokernelTester()
6950 .mr(1)
6951 .nr(8)
6952 .kr(4)
6953 .sr(1)
6954 .m(m)
6955 .n(n)
6956 .k(k)
6957 .cm_stride(11)
6958 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006959 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07006960 }
6961 }
6962 }
6963 }
6964
Frank Barcharde22685a2021-11-12 11:36:58 -08006965 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, a_offset) {
Frank Barchard287952a2021-11-03 15:26:45 -07006966 TEST_REQUIRES_ARM_NEON;
6967 for (size_t k = 1; k <= 80; k += 17) {
6968 GemmMicrokernelTester()
6969 .mr(1)
6970 .nr(8)
6971 .kr(4)
6972 .sr(1)
6973 .m(1)
6974 .n(8)
6975 .k(k)
6976 .ks(3)
6977 .a_offset(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08006978 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07006979 }
6980 }
6981
Frank Barcharde22685a2021-11-12 11:36:58 -08006982 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, zero) {
Frank Barchard287952a2021-11-03 15:26:45 -07006983 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08006984 for (size_t k = 1; k <= 80; k += 17) {
6985 for (uint32_t mz = 0; mz < 1; mz++) {
Frank Barchard287952a2021-11-03 15:26:45 -07006986 GemmMicrokernelTester()
6987 .mr(1)
6988 .nr(8)
6989 .kr(4)
6990 .sr(1)
6991 .m(1)
6992 .n(8)
6993 .k(k)
6994 .ks(3)
6995 .a_offset(83)
6996 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08006997 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07006998 }
6999 }
7000 }
7001
Frank Barcharde22685a2021-11-12 11:36:58 -08007002 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, qmin) {
Frank Barchard287952a2021-11-03 15:26:45 -07007003 TEST_REQUIRES_ARM_NEON;
7004 GemmMicrokernelTester()
7005 .mr(1)
7006 .nr(8)
7007 .kr(4)
7008 .sr(1)
7009 .m(1)
7010 .n(8)
7011 .k(16)
7012 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007013 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07007014 }
7015
Frank Barcharde22685a2021-11-12 11:36:58 -08007016 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, qmax) {
Frank Barchard287952a2021-11-03 15:26:45 -07007017 TEST_REQUIRES_ARM_NEON;
7018 GemmMicrokernelTester()
7019 .mr(1)
7020 .nr(8)
7021 .kr(4)
7022 .sr(1)
7023 .m(1)
7024 .n(8)
7025 .k(16)
7026 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007027 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07007028 }
7029
Frank Barcharde22685a2021-11-12 11:36:58 -08007030 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, strided_cm) {
Frank Barchard287952a2021-11-03 15:26:45 -07007031 TEST_REQUIRES_ARM_NEON;
7032 GemmMicrokernelTester()
7033 .mr(1)
7034 .nr(8)
7035 .kr(4)
7036 .sr(1)
7037 .m(1)
7038 .n(8)
7039 .k(16)
7040 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08007041 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07007042 }
7043#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
7044
7045
7046#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Frank Barcharde22685a2021-11-12 11:36:58 -08007047 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, k_eq_16) {
Frank Barchard287952a2021-11-03 15:26:45 -07007048 TEST_REQUIRES_ARM_NEON;
7049 GemmMicrokernelTester()
7050 .mr(2)
7051 .nr(8)
7052 .kr(4)
7053 .sr(1)
7054 .m(2)
7055 .n(8)
7056 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08007057 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07007058 }
7059
Frank Barcharde22685a2021-11-12 11:36:58 -08007060 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, strided_cn) {
Frank Barchard287952a2021-11-03 15:26:45 -07007061 TEST_REQUIRES_ARM_NEON;
7062 GemmMicrokernelTester()
7063 .mr(2)
7064 .nr(8)
7065 .kr(4)
7066 .sr(1)
7067 .m(2)
7068 .n(8)
7069 .k(16)
7070 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08007071 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07007072 }
7073
Frank Barcharde22685a2021-11-12 11:36:58 -08007074 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, k_eq_16_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -07007075 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08007076 for (uint32_t n = 1; n <= 8; n++) {
7077 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -07007078 GemmMicrokernelTester()
7079 .mr(2)
7080 .nr(8)
7081 .kr(4)
7082 .sr(1)
7083 .m(m)
7084 .n(n)
7085 .k(16)
7086 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007087 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07007088 }
7089 }
7090 }
7091
Frank Barcharde22685a2021-11-12 11:36:58 -08007092 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, k_eq_16_subtile_m) {
Frank Barchard287952a2021-11-03 15:26:45 -07007093 TEST_REQUIRES_ARM_NEON;
7094 for (uint32_t m = 1; m <= 2; m++) {
7095 GemmMicrokernelTester()
7096 .mr(2)
7097 .nr(8)
7098 .kr(4)
7099 .sr(1)
7100 .m(m)
7101 .n(8)
7102 .k(16)
7103 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007104 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07007105 }
7106 }
7107
Frank Barcharde22685a2021-11-12 11:36:58 -08007108 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, k_eq_16_subtile_n) {
Frank Barchard287952a2021-11-03 15:26:45 -07007109 TEST_REQUIRES_ARM_NEON;
7110 for (uint32_t n = 1; n <= 8; n++) {
7111 GemmMicrokernelTester()
7112 .mr(2)
7113 .nr(8)
7114 .kr(4)
7115 .sr(1)
7116 .m(2)
7117 .n(n)
7118 .k(16)
7119 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007120 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07007121 }
7122 }
7123
Frank Barcharde22685a2021-11-12 11:36:58 -08007124 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, k_lt_16) {
Frank Barchard287952a2021-11-03 15:26:45 -07007125 TEST_REQUIRES_ARM_NEON;
7126 for (size_t k = 1; k < 16; k++) {
7127 GemmMicrokernelTester()
7128 .mr(2)
7129 .nr(8)
7130 .kr(4)
7131 .sr(1)
7132 .m(2)
7133 .n(8)
7134 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007135 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07007136 }
7137 }
7138
Frank Barcharde22685a2021-11-12 11:36:58 -08007139 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, k_lt_16_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -07007140 TEST_REQUIRES_ARM_NEON;
7141 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007142 for (uint32_t n = 1; n <= 8; n++) {
7143 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -07007144 GemmMicrokernelTester()
7145 .mr(2)
7146 .nr(8)
7147 .kr(4)
7148 .sr(1)
7149 .m(m)
7150 .n(n)
7151 .k(k)
7152 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007153 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07007154 }
7155 }
7156 }
7157 }
7158
Frank Barcharde22685a2021-11-12 11:36:58 -08007159 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, k_gt_16) {
Frank Barchard287952a2021-11-03 15:26:45 -07007160 TEST_REQUIRES_ARM_NEON;
7161 for (size_t k = 17; k < 32; k++) {
7162 GemmMicrokernelTester()
7163 .mr(2)
7164 .nr(8)
7165 .kr(4)
7166 .sr(1)
7167 .m(2)
7168 .n(8)
7169 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007170 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07007171 }
7172 }
7173
Frank Barcharde22685a2021-11-12 11:36:58 -08007174 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, k_gt_16_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -07007175 TEST_REQUIRES_ARM_NEON;
7176 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007177 for (uint32_t n = 1; n <= 8; n++) {
7178 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -07007179 GemmMicrokernelTester()
7180 .mr(2)
7181 .nr(8)
7182 .kr(4)
7183 .sr(1)
7184 .m(m)
7185 .n(n)
7186 .k(k)
7187 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007188 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07007189 }
7190 }
7191 }
7192 }
7193
Frank Barcharde22685a2021-11-12 11:36:58 -08007194 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, k_div_16) {
Frank Barchard287952a2021-11-03 15:26:45 -07007195 TEST_REQUIRES_ARM_NEON;
7196 for (size_t k = 32; k <= 160; k += 16) {
7197 GemmMicrokernelTester()
7198 .mr(2)
7199 .nr(8)
7200 .kr(4)
7201 .sr(1)
7202 .m(2)
7203 .n(8)
7204 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007205 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07007206 }
7207 }
7208
Frank Barcharde22685a2021-11-12 11:36:58 -08007209 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, k_div_16_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -07007210 TEST_REQUIRES_ARM_NEON;
7211 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007212 for (uint32_t n = 1; n <= 8; n++) {
7213 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -07007214 GemmMicrokernelTester()
7215 .mr(2)
7216 .nr(8)
7217 .kr(4)
7218 .sr(1)
7219 .m(m)
7220 .n(n)
7221 .k(k)
7222 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007223 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07007224 }
7225 }
7226 }
7227 }
7228
Frank Barcharde22685a2021-11-12 11:36:58 -08007229 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, n_gt_8) {
Frank Barchard287952a2021-11-03 15:26:45 -07007230 TEST_REQUIRES_ARM_NEON;
7231 for (uint32_t n = 9; n < 16; n++) {
7232 for (size_t k = 1; k <= 80; k += 17) {
7233 GemmMicrokernelTester()
7234 .mr(2)
7235 .nr(8)
7236 .kr(4)
7237 .sr(1)
7238 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007239 .n(n)
Frank Barchard287952a2021-11-03 15:26:45 -07007240 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007241 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07007242 }
7243 }
7244 }
7245
Frank Barcharde22685a2021-11-12 11:36:58 -08007246 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, n_gt_8_strided_cn) {
Frank Barchard287952a2021-11-03 15:26:45 -07007247 TEST_REQUIRES_ARM_NEON;
7248 for (uint32_t n = 9; n < 16; n++) {
7249 for (size_t k = 1; k <= 80; k += 17) {
7250 GemmMicrokernelTester()
7251 .mr(2)
7252 .nr(8)
7253 .kr(4)
7254 .sr(1)
7255 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007256 .n(n)
Frank Barchard287952a2021-11-03 15:26:45 -07007257 .k(k)
7258 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08007259 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07007260 }
7261 }
7262 }
7263
Frank Barcharde22685a2021-11-12 11:36:58 -08007264 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, n_gt_8_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -07007265 TEST_REQUIRES_ARM_NEON;
7266 for (uint32_t n = 9; n < 16; n++) {
7267 for (size_t k = 1; k <= 80; k += 17) {
7268 for (uint32_t m = 1; m <= 2; m++) {
7269 GemmMicrokernelTester()
7270 .mr(2)
7271 .nr(8)
7272 .kr(4)
7273 .sr(1)
7274 .m(m)
7275 .n(n)
7276 .k(k)
7277 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007278 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07007279 }
7280 }
7281 }
7282 }
7283
Frank Barcharde22685a2021-11-12 11:36:58 -08007284 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, n_div_8) {
Frank Barchard287952a2021-11-03 15:26:45 -07007285 TEST_REQUIRES_ARM_NEON;
7286 for (uint32_t n = 16; n <= 24; n += 8) {
7287 for (size_t k = 1; k <= 80; k += 17) {
7288 GemmMicrokernelTester()
7289 .mr(2)
7290 .nr(8)
7291 .kr(4)
7292 .sr(1)
7293 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007294 .n(n)
Frank Barchard287952a2021-11-03 15:26:45 -07007295 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007296 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07007297 }
7298 }
7299 }
7300
Frank Barcharde22685a2021-11-12 11:36:58 -08007301 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, n_div_8_strided_cn) {
Frank Barchard287952a2021-11-03 15:26:45 -07007302 TEST_REQUIRES_ARM_NEON;
7303 for (uint32_t n = 16; n <= 24; n += 8) {
7304 for (size_t k = 1; k <= 80; k += 17) {
7305 GemmMicrokernelTester()
7306 .mr(2)
7307 .nr(8)
7308 .kr(4)
7309 .sr(1)
7310 .m(2)
7311 .n(n)
7312 .k(k)
7313 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08007314 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07007315 }
7316 }
7317 }
7318
Frank Barcharde22685a2021-11-12 11:36:58 -08007319 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, n_div_8_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -07007320 TEST_REQUIRES_ARM_NEON;
7321 for (uint32_t n = 16; n <= 24; n += 8) {
7322 for (size_t k = 1; k <= 80; k += 17) {
7323 for (uint32_t m = 1; m <= 2; m++) {
7324 GemmMicrokernelTester()
7325 .mr(2)
7326 .nr(8)
7327 .kr(4)
7328 .sr(1)
7329 .m(m)
7330 .n(n)
7331 .k(k)
7332 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007333 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07007334 }
7335 }
7336 }
7337 }
7338
Frank Barcharde22685a2021-11-12 11:36:58 -08007339 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, small_kernel) {
Frank Barchard287952a2021-11-03 15:26:45 -07007340 TEST_REQUIRES_ARM_NEON;
7341 for (size_t k = 1; k <= 80; k += 17) {
7342 GemmMicrokernelTester()
7343 .mr(2)
7344 .nr(8)
7345 .kr(4)
7346 .sr(1)
7347 .m(2)
7348 .n(8)
7349 .k(k)
7350 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08007351 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07007352 }
7353 }
7354
Frank Barcharde22685a2021-11-12 11:36:58 -08007355 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, small_kernel_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -07007356 TEST_REQUIRES_ARM_NEON;
7357 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007358 for (uint32_t n = 1; n <= 8; n++) {
7359 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -07007360 GemmMicrokernelTester()
7361 .mr(2)
7362 .nr(8)
7363 .kr(4)
7364 .sr(1)
7365 .m(m)
7366 .n(n)
7367 .k(k)
7368 .ks(3)
7369 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007370 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07007371 }
7372 }
7373 }
7374 }
7375
Frank Barcharde22685a2021-11-12 11:36:58 -08007376 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, n_gt_8_small_kernel) {
Frank Barchard287952a2021-11-03 15:26:45 -07007377 TEST_REQUIRES_ARM_NEON;
7378 for (uint32_t n = 9; n < 16; n++) {
7379 for (size_t k = 1; k <= 80; k += 17) {
7380 GemmMicrokernelTester()
7381 .mr(2)
7382 .nr(8)
7383 .kr(4)
7384 .sr(1)
7385 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007386 .n(n)
Frank Barchard287952a2021-11-03 15:26:45 -07007387 .k(k)
7388 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08007389 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07007390 }
7391 }
7392 }
7393
Frank Barcharde22685a2021-11-12 11:36:58 -08007394 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, n_div_8_small_kernel) {
Frank Barchard287952a2021-11-03 15:26:45 -07007395 TEST_REQUIRES_ARM_NEON;
7396 for (uint32_t n = 16; n <= 24; n += 8) {
7397 for (size_t k = 1; k <= 80; k += 17) {
7398 GemmMicrokernelTester()
7399 .mr(2)
7400 .nr(8)
7401 .kr(4)
7402 .sr(1)
7403 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007404 .n(n)
Frank Barchard287952a2021-11-03 15:26:45 -07007405 .k(k)
7406 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08007407 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07007408 }
7409 }
7410 }
7411
Frank Barcharde22685a2021-11-12 11:36:58 -08007412 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, strided_cm_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -07007413 TEST_REQUIRES_ARM_NEON;
7414 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007415 for (uint32_t n = 1; n <= 8; n++) {
7416 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -07007417 GemmMicrokernelTester()
7418 .mr(2)
7419 .nr(8)
7420 .kr(4)
7421 .sr(1)
7422 .m(m)
7423 .n(n)
7424 .k(k)
7425 .cm_stride(11)
7426 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007427 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07007428 }
7429 }
7430 }
7431 }
7432
Frank Barcharde22685a2021-11-12 11:36:58 -08007433 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, a_offset) {
Frank Barchard287952a2021-11-03 15:26:45 -07007434 TEST_REQUIRES_ARM_NEON;
7435 for (size_t k = 1; k <= 80; k += 17) {
7436 GemmMicrokernelTester()
7437 .mr(2)
7438 .nr(8)
7439 .kr(4)
7440 .sr(1)
7441 .m(2)
7442 .n(8)
7443 .k(k)
7444 .ks(3)
7445 .a_offset(163)
Marat Dukhan50323b82022-01-11 00:12:01 -08007446 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07007447 }
7448 }
7449
Frank Barcharde22685a2021-11-12 11:36:58 -08007450 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, zero) {
Frank Barchard287952a2021-11-03 15:26:45 -07007451 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08007452 for (size_t k = 1; k <= 80; k += 17) {
7453 for (uint32_t mz = 0; mz < 2; mz++) {
Frank Barchard287952a2021-11-03 15:26:45 -07007454 GemmMicrokernelTester()
7455 .mr(2)
7456 .nr(8)
7457 .kr(4)
7458 .sr(1)
7459 .m(2)
7460 .n(8)
7461 .k(k)
7462 .ks(3)
7463 .a_offset(163)
7464 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08007465 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07007466 }
7467 }
7468 }
7469
Frank Barcharde22685a2021-11-12 11:36:58 -08007470 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, qmin) {
Frank Barchard287952a2021-11-03 15:26:45 -07007471 TEST_REQUIRES_ARM_NEON;
7472 GemmMicrokernelTester()
7473 .mr(2)
7474 .nr(8)
7475 .kr(4)
7476 .sr(1)
7477 .m(2)
7478 .n(8)
7479 .k(16)
7480 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007481 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07007482 }
7483
Frank Barcharde22685a2021-11-12 11:36:58 -08007484 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, qmax) {
Frank Barchard287952a2021-11-03 15:26:45 -07007485 TEST_REQUIRES_ARM_NEON;
7486 GemmMicrokernelTester()
7487 .mr(2)
7488 .nr(8)
7489 .kr(4)
7490 .sr(1)
7491 .m(2)
7492 .n(8)
7493 .k(16)
7494 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007495 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07007496 }
7497
Frank Barcharde22685a2021-11-12 11:36:58 -08007498 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, strided_cm) {
Frank Barchard287952a2021-11-03 15:26:45 -07007499 TEST_REQUIRES_ARM_NEON;
7500 GemmMicrokernelTester()
7501 .mr(2)
7502 .nr(8)
7503 .kr(4)
7504 .sr(1)
7505 .m(2)
7506 .n(8)
7507 .k(16)
7508 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08007509 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07007510 }
7511#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
7512
7513
7514#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Frank Barcharde22685a2021-11-12 11:36:58 -08007515 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, k_eq_16) {
Frank Barchard287952a2021-11-03 15:26:45 -07007516 TEST_REQUIRES_ARM_NEON_V8;
7517 GemmMicrokernelTester()
7518 .mr(1)
7519 .nr(8)
7520 .kr(4)
7521 .sr(1)
7522 .m(1)
7523 .n(8)
7524 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08007525 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07007526 }
7527
Frank Barcharde22685a2021-11-12 11:36:58 -08007528 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, strided_cn) {
Frank Barchard287952a2021-11-03 15:26:45 -07007529 TEST_REQUIRES_ARM_NEON_V8;
7530 GemmMicrokernelTester()
7531 .mr(1)
7532 .nr(8)
7533 .kr(4)
7534 .sr(1)
7535 .m(1)
7536 .n(8)
7537 .k(16)
7538 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08007539 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07007540 }
7541
Frank Barcharde22685a2021-11-12 11:36:58 -08007542 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, k_eq_16_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -07007543 TEST_REQUIRES_ARM_NEON_V8;
Zhi An Ng83844ae2022-01-14 09:52:25 -08007544 for (uint32_t n = 1; n <= 8; n++) {
7545 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -07007546 GemmMicrokernelTester()
7547 .mr(1)
7548 .nr(8)
7549 .kr(4)
7550 .sr(1)
7551 .m(m)
7552 .n(n)
7553 .k(16)
7554 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007555 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07007556 }
7557 }
7558 }
7559
Frank Barcharde22685a2021-11-12 11:36:58 -08007560 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, k_eq_16_subtile_m) {
Frank Barchard287952a2021-11-03 15:26:45 -07007561 TEST_REQUIRES_ARM_NEON_V8;
7562 for (uint32_t m = 1; m <= 1; m++) {
7563 GemmMicrokernelTester()
7564 .mr(1)
7565 .nr(8)
7566 .kr(4)
7567 .sr(1)
7568 .m(m)
7569 .n(8)
7570 .k(16)
7571 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007572 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07007573 }
7574 }
7575
Frank Barcharde22685a2021-11-12 11:36:58 -08007576 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, k_eq_16_subtile_n) {
Frank Barchard287952a2021-11-03 15:26:45 -07007577 TEST_REQUIRES_ARM_NEON_V8;
7578 for (uint32_t n = 1; n <= 8; n++) {
7579 GemmMicrokernelTester()
7580 .mr(1)
7581 .nr(8)
7582 .kr(4)
7583 .sr(1)
7584 .m(1)
7585 .n(n)
7586 .k(16)
7587 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007588 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07007589 }
7590 }
7591
Frank Barcharde22685a2021-11-12 11:36:58 -08007592 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, k_lt_16) {
Frank Barchard287952a2021-11-03 15:26:45 -07007593 TEST_REQUIRES_ARM_NEON_V8;
7594 for (size_t k = 1; k < 16; k++) {
7595 GemmMicrokernelTester()
7596 .mr(1)
7597 .nr(8)
7598 .kr(4)
7599 .sr(1)
7600 .m(1)
7601 .n(8)
7602 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007603 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07007604 }
7605 }
7606
Frank Barcharde22685a2021-11-12 11:36:58 -08007607 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, k_lt_16_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -07007608 TEST_REQUIRES_ARM_NEON_V8;
7609 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007610 for (uint32_t n = 1; n <= 8; n++) {
7611 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -07007612 GemmMicrokernelTester()
7613 .mr(1)
7614 .nr(8)
7615 .kr(4)
7616 .sr(1)
7617 .m(m)
7618 .n(n)
7619 .k(k)
7620 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007621 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07007622 }
7623 }
7624 }
7625 }
7626
Frank Barcharde22685a2021-11-12 11:36:58 -08007627 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, k_gt_16) {
Frank Barchard287952a2021-11-03 15:26:45 -07007628 TEST_REQUIRES_ARM_NEON_V8;
7629 for (size_t k = 17; k < 32; k++) {
7630 GemmMicrokernelTester()
7631 .mr(1)
7632 .nr(8)
7633 .kr(4)
7634 .sr(1)
7635 .m(1)
7636 .n(8)
7637 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007638 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07007639 }
7640 }
7641
Frank Barcharde22685a2021-11-12 11:36:58 -08007642 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, k_gt_16_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -07007643 TEST_REQUIRES_ARM_NEON_V8;
7644 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007645 for (uint32_t n = 1; n <= 8; n++) {
7646 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -07007647 GemmMicrokernelTester()
7648 .mr(1)
7649 .nr(8)
7650 .kr(4)
7651 .sr(1)
7652 .m(m)
7653 .n(n)
7654 .k(k)
7655 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007656 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07007657 }
7658 }
7659 }
7660 }
7661
Frank Barcharde22685a2021-11-12 11:36:58 -08007662 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, k_div_16) {
Frank Barchard287952a2021-11-03 15:26:45 -07007663 TEST_REQUIRES_ARM_NEON_V8;
7664 for (size_t k = 32; k <= 160; k += 16) {
7665 GemmMicrokernelTester()
7666 .mr(1)
7667 .nr(8)
7668 .kr(4)
7669 .sr(1)
7670 .m(1)
7671 .n(8)
7672 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007673 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07007674 }
7675 }
7676
Frank Barcharde22685a2021-11-12 11:36:58 -08007677 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, k_div_16_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -07007678 TEST_REQUIRES_ARM_NEON_V8;
7679 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007680 for (uint32_t n = 1; n <= 8; n++) {
7681 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -07007682 GemmMicrokernelTester()
7683 .mr(1)
7684 .nr(8)
7685 .kr(4)
7686 .sr(1)
7687 .m(m)
7688 .n(n)
7689 .k(k)
7690 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007691 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07007692 }
7693 }
7694 }
7695 }
7696
Frank Barcharde22685a2021-11-12 11:36:58 -08007697 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, n_gt_8) {
Frank Barchard287952a2021-11-03 15:26:45 -07007698 TEST_REQUIRES_ARM_NEON_V8;
7699 for (uint32_t n = 9; n < 16; n++) {
7700 for (size_t k = 1; k <= 80; k += 17) {
7701 GemmMicrokernelTester()
7702 .mr(1)
7703 .nr(8)
7704 .kr(4)
7705 .sr(1)
7706 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007707 .n(n)
Frank Barchard287952a2021-11-03 15:26:45 -07007708 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007709 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07007710 }
7711 }
7712 }
7713
Frank Barcharde22685a2021-11-12 11:36:58 -08007714 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, n_gt_8_strided_cn) {
Frank Barchard287952a2021-11-03 15:26:45 -07007715 TEST_REQUIRES_ARM_NEON_V8;
7716 for (uint32_t n = 9; n < 16; n++) {
7717 for (size_t k = 1; k <= 80; k += 17) {
7718 GemmMicrokernelTester()
7719 .mr(1)
7720 .nr(8)
7721 .kr(4)
7722 .sr(1)
7723 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007724 .n(n)
Frank Barchard287952a2021-11-03 15:26:45 -07007725 .k(k)
7726 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08007727 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07007728 }
7729 }
7730 }
7731
Frank Barcharde22685a2021-11-12 11:36:58 -08007732 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, n_gt_8_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -07007733 TEST_REQUIRES_ARM_NEON_V8;
7734 for (uint32_t n = 9; n < 16; n++) {
7735 for (size_t k = 1; k <= 80; k += 17) {
7736 for (uint32_t m = 1; m <= 1; m++) {
7737 GemmMicrokernelTester()
7738 .mr(1)
7739 .nr(8)
7740 .kr(4)
7741 .sr(1)
7742 .m(m)
7743 .n(n)
7744 .k(k)
7745 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007746 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07007747 }
7748 }
7749 }
7750 }
7751
Frank Barcharde22685a2021-11-12 11:36:58 -08007752 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, n_div_8) {
Frank Barchard287952a2021-11-03 15:26:45 -07007753 TEST_REQUIRES_ARM_NEON_V8;
7754 for (uint32_t n = 16; n <= 24; n += 8) {
7755 for (size_t k = 1; k <= 80; k += 17) {
7756 GemmMicrokernelTester()
7757 .mr(1)
7758 .nr(8)
7759 .kr(4)
7760 .sr(1)
7761 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007762 .n(n)
Frank Barchard287952a2021-11-03 15:26:45 -07007763 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007764 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07007765 }
7766 }
7767 }
7768
Frank Barcharde22685a2021-11-12 11:36:58 -08007769 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, n_div_8_strided_cn) {
Frank Barchard287952a2021-11-03 15:26:45 -07007770 TEST_REQUIRES_ARM_NEON_V8;
7771 for (uint32_t n = 16; n <= 24; n += 8) {
7772 for (size_t k = 1; k <= 80; k += 17) {
7773 GemmMicrokernelTester()
7774 .mr(1)
7775 .nr(8)
7776 .kr(4)
7777 .sr(1)
7778 .m(1)
7779 .n(n)
7780 .k(k)
7781 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08007782 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07007783 }
7784 }
7785 }
7786
Frank Barcharde22685a2021-11-12 11:36:58 -08007787 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, n_div_8_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -07007788 TEST_REQUIRES_ARM_NEON_V8;
7789 for (uint32_t n = 16; n <= 24; n += 8) {
7790 for (size_t k = 1; k <= 80; k += 17) {
7791 for (uint32_t m = 1; m <= 1; m++) {
7792 GemmMicrokernelTester()
7793 .mr(1)
7794 .nr(8)
7795 .kr(4)
7796 .sr(1)
7797 .m(m)
7798 .n(n)
7799 .k(k)
7800 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007801 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07007802 }
7803 }
7804 }
7805 }
7806
Frank Barcharde22685a2021-11-12 11:36:58 -08007807 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, small_kernel) {
Frank Barchard287952a2021-11-03 15:26:45 -07007808 TEST_REQUIRES_ARM_NEON_V8;
7809 for (size_t k = 1; k <= 80; k += 17) {
7810 GemmMicrokernelTester()
7811 .mr(1)
7812 .nr(8)
7813 .kr(4)
7814 .sr(1)
7815 .m(1)
7816 .n(8)
7817 .k(k)
7818 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08007819 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07007820 }
7821 }
7822
Frank Barcharde22685a2021-11-12 11:36:58 -08007823 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, small_kernel_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -07007824 TEST_REQUIRES_ARM_NEON_V8;
7825 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007826 for (uint32_t n = 1; n <= 8; n++) {
7827 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -07007828 GemmMicrokernelTester()
7829 .mr(1)
7830 .nr(8)
7831 .kr(4)
7832 .sr(1)
7833 .m(m)
7834 .n(n)
7835 .k(k)
7836 .ks(3)
7837 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007838 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07007839 }
7840 }
7841 }
7842 }
7843
Frank Barcharde22685a2021-11-12 11:36:58 -08007844 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, n_gt_8_small_kernel) {
Frank Barchard287952a2021-11-03 15:26:45 -07007845 TEST_REQUIRES_ARM_NEON_V8;
7846 for (uint32_t n = 9; n < 16; n++) {
7847 for (size_t k = 1; k <= 80; k += 17) {
7848 GemmMicrokernelTester()
7849 .mr(1)
7850 .nr(8)
7851 .kr(4)
7852 .sr(1)
7853 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007854 .n(n)
Frank Barchard287952a2021-11-03 15:26:45 -07007855 .k(k)
7856 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08007857 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07007858 }
7859 }
7860 }
7861
Frank Barcharde22685a2021-11-12 11:36:58 -08007862 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, n_div_8_small_kernel) {
Frank Barchard287952a2021-11-03 15:26:45 -07007863 TEST_REQUIRES_ARM_NEON_V8;
7864 for (uint32_t n = 16; n <= 24; n += 8) {
7865 for (size_t k = 1; k <= 80; k += 17) {
7866 GemmMicrokernelTester()
7867 .mr(1)
7868 .nr(8)
7869 .kr(4)
7870 .sr(1)
7871 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007872 .n(n)
Frank Barchard287952a2021-11-03 15:26:45 -07007873 .k(k)
7874 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08007875 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07007876 }
7877 }
7878 }
7879
Frank Barcharde22685a2021-11-12 11:36:58 -08007880 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, strided_cm_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -07007881 TEST_REQUIRES_ARM_NEON_V8;
7882 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007883 for (uint32_t n = 1; n <= 8; n++) {
7884 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -07007885 GemmMicrokernelTester()
7886 .mr(1)
7887 .nr(8)
7888 .kr(4)
7889 .sr(1)
7890 .m(m)
7891 .n(n)
7892 .k(k)
7893 .cm_stride(11)
7894 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007895 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07007896 }
7897 }
7898 }
7899 }
7900
Frank Barcharde22685a2021-11-12 11:36:58 -08007901 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, a_offset) {
Frank Barchard287952a2021-11-03 15:26:45 -07007902 TEST_REQUIRES_ARM_NEON_V8;
7903 for (size_t k = 1; k <= 80; k += 17) {
7904 GemmMicrokernelTester()
7905 .mr(1)
7906 .nr(8)
7907 .kr(4)
7908 .sr(1)
7909 .m(1)
7910 .n(8)
7911 .k(k)
7912 .ks(3)
7913 .a_offset(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08007914 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07007915 }
7916 }
7917
Frank Barcharde22685a2021-11-12 11:36:58 -08007918 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, zero) {
Frank Barchard287952a2021-11-03 15:26:45 -07007919 TEST_REQUIRES_ARM_NEON_V8;
Zhi An Ng83844ae2022-01-14 09:52:25 -08007920 for (size_t k = 1; k <= 80; k += 17) {
7921 for (uint32_t mz = 0; mz < 1; mz++) {
Frank Barchard287952a2021-11-03 15:26:45 -07007922 GemmMicrokernelTester()
7923 .mr(1)
7924 .nr(8)
7925 .kr(4)
7926 .sr(1)
7927 .m(1)
7928 .n(8)
7929 .k(k)
7930 .ks(3)
7931 .a_offset(83)
7932 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08007933 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07007934 }
7935 }
7936 }
7937
Frank Barcharde22685a2021-11-12 11:36:58 -08007938 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, qmin) {
Frank Barchard287952a2021-11-03 15:26:45 -07007939 TEST_REQUIRES_ARM_NEON_V8;
7940 GemmMicrokernelTester()
7941 .mr(1)
7942 .nr(8)
7943 .kr(4)
7944 .sr(1)
7945 .m(1)
7946 .n(8)
7947 .k(16)
7948 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007949 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07007950 }
7951
Frank Barcharde22685a2021-11-12 11:36:58 -08007952 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, qmax) {
Frank Barchard287952a2021-11-03 15:26:45 -07007953 TEST_REQUIRES_ARM_NEON_V8;
7954 GemmMicrokernelTester()
7955 .mr(1)
7956 .nr(8)
7957 .kr(4)
7958 .sr(1)
7959 .m(1)
7960 .n(8)
7961 .k(16)
7962 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007963 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07007964 }
7965
Frank Barcharde22685a2021-11-12 11:36:58 -08007966 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, strided_cm) {
Frank Barchard287952a2021-11-03 15:26:45 -07007967 TEST_REQUIRES_ARM_NEON_V8;
7968 GemmMicrokernelTester()
7969 .mr(1)
7970 .nr(8)
7971 .kr(4)
7972 .sr(1)
7973 .m(1)
7974 .n(8)
7975 .k(16)
7976 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08007977 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07007978 }
7979#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
7980
7981
7982#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Frank Barcharde22685a2021-11-12 11:36:58 -08007983 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, k_eq_16) {
Frank Barchard287952a2021-11-03 15:26:45 -07007984 TEST_REQUIRES_ARM_NEON_V8;
7985 GemmMicrokernelTester()
7986 .mr(2)
7987 .nr(8)
7988 .kr(4)
7989 .sr(1)
7990 .m(2)
7991 .n(8)
7992 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08007993 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07007994 }
7995
Frank Barcharde22685a2021-11-12 11:36:58 -08007996 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, strided_cn) {
Frank Barchard287952a2021-11-03 15:26:45 -07007997 TEST_REQUIRES_ARM_NEON_V8;
7998 GemmMicrokernelTester()
7999 .mr(2)
8000 .nr(8)
8001 .kr(4)
8002 .sr(1)
8003 .m(2)
8004 .n(8)
8005 .k(16)
8006 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08008007 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07008008 }
8009
Frank Barcharde22685a2021-11-12 11:36:58 -08008010 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, k_eq_16_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -07008011 TEST_REQUIRES_ARM_NEON_V8;
Zhi An Ng83844ae2022-01-14 09:52:25 -08008012 for (uint32_t n = 1; n <= 8; n++) {
8013 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -07008014 GemmMicrokernelTester()
8015 .mr(2)
8016 .nr(8)
8017 .kr(4)
8018 .sr(1)
8019 .m(m)
8020 .n(n)
8021 .k(16)
8022 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008023 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07008024 }
8025 }
8026 }
8027
Frank Barcharde22685a2021-11-12 11:36:58 -08008028 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, k_eq_16_subtile_m) {
Frank Barchard287952a2021-11-03 15:26:45 -07008029 TEST_REQUIRES_ARM_NEON_V8;
8030 for (uint32_t m = 1; m <= 2; m++) {
8031 GemmMicrokernelTester()
8032 .mr(2)
8033 .nr(8)
8034 .kr(4)
8035 .sr(1)
8036 .m(m)
8037 .n(8)
8038 .k(16)
8039 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008040 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07008041 }
8042 }
8043
Frank Barcharde22685a2021-11-12 11:36:58 -08008044 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, k_eq_16_subtile_n) {
Frank Barchard287952a2021-11-03 15:26:45 -07008045 TEST_REQUIRES_ARM_NEON_V8;
8046 for (uint32_t n = 1; n <= 8; n++) {
8047 GemmMicrokernelTester()
8048 .mr(2)
8049 .nr(8)
8050 .kr(4)
8051 .sr(1)
8052 .m(2)
8053 .n(n)
8054 .k(16)
8055 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008056 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07008057 }
8058 }
8059
Frank Barcharde22685a2021-11-12 11:36:58 -08008060 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, k_lt_16) {
Frank Barchard287952a2021-11-03 15:26:45 -07008061 TEST_REQUIRES_ARM_NEON_V8;
8062 for (size_t k = 1; k < 16; k++) {
8063 GemmMicrokernelTester()
8064 .mr(2)
8065 .nr(8)
8066 .kr(4)
8067 .sr(1)
8068 .m(2)
8069 .n(8)
8070 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008071 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07008072 }
8073 }
8074
Frank Barcharde22685a2021-11-12 11:36:58 -08008075 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, k_lt_16_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -07008076 TEST_REQUIRES_ARM_NEON_V8;
8077 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008078 for (uint32_t n = 1; n <= 8; n++) {
8079 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -07008080 GemmMicrokernelTester()
8081 .mr(2)
8082 .nr(8)
8083 .kr(4)
8084 .sr(1)
8085 .m(m)
8086 .n(n)
8087 .k(k)
8088 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008089 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07008090 }
8091 }
8092 }
8093 }
8094
Frank Barcharde22685a2021-11-12 11:36:58 -08008095 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, k_gt_16) {
Frank Barchard287952a2021-11-03 15:26:45 -07008096 TEST_REQUIRES_ARM_NEON_V8;
8097 for (size_t k = 17; k < 32; k++) {
8098 GemmMicrokernelTester()
8099 .mr(2)
8100 .nr(8)
8101 .kr(4)
8102 .sr(1)
8103 .m(2)
8104 .n(8)
8105 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008106 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07008107 }
8108 }
8109
Frank Barcharde22685a2021-11-12 11:36:58 -08008110 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, k_gt_16_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -07008111 TEST_REQUIRES_ARM_NEON_V8;
8112 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008113 for (uint32_t n = 1; n <= 8; n++) {
8114 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -07008115 GemmMicrokernelTester()
8116 .mr(2)
8117 .nr(8)
8118 .kr(4)
8119 .sr(1)
8120 .m(m)
8121 .n(n)
8122 .k(k)
8123 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008124 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07008125 }
8126 }
8127 }
8128 }
8129
Frank Barcharde22685a2021-11-12 11:36:58 -08008130 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, k_div_16) {
Frank Barchard287952a2021-11-03 15:26:45 -07008131 TEST_REQUIRES_ARM_NEON_V8;
8132 for (size_t k = 32; k <= 160; k += 16) {
8133 GemmMicrokernelTester()
8134 .mr(2)
8135 .nr(8)
8136 .kr(4)
8137 .sr(1)
8138 .m(2)
8139 .n(8)
8140 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008141 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07008142 }
8143 }
8144
Frank Barcharde22685a2021-11-12 11:36:58 -08008145 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, k_div_16_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -07008146 TEST_REQUIRES_ARM_NEON_V8;
8147 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008148 for (uint32_t n = 1; n <= 8; n++) {
8149 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -07008150 GemmMicrokernelTester()
8151 .mr(2)
8152 .nr(8)
8153 .kr(4)
8154 .sr(1)
8155 .m(m)
8156 .n(n)
8157 .k(k)
8158 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008159 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07008160 }
8161 }
8162 }
8163 }
8164
Frank Barcharde22685a2021-11-12 11:36:58 -08008165 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, n_gt_8) {
Frank Barchard287952a2021-11-03 15:26:45 -07008166 TEST_REQUIRES_ARM_NEON_V8;
8167 for (uint32_t n = 9; n < 16; n++) {
8168 for (size_t k = 1; k <= 80; k += 17) {
8169 GemmMicrokernelTester()
8170 .mr(2)
8171 .nr(8)
8172 .kr(4)
8173 .sr(1)
8174 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008175 .n(n)
Frank Barchard287952a2021-11-03 15:26:45 -07008176 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008177 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07008178 }
8179 }
8180 }
8181
Frank Barcharde22685a2021-11-12 11:36:58 -08008182 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, n_gt_8_strided_cn) {
Frank Barchard287952a2021-11-03 15:26:45 -07008183 TEST_REQUIRES_ARM_NEON_V8;
8184 for (uint32_t n = 9; n < 16; n++) {
8185 for (size_t k = 1; k <= 80; k += 17) {
8186 GemmMicrokernelTester()
8187 .mr(2)
8188 .nr(8)
8189 .kr(4)
8190 .sr(1)
8191 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008192 .n(n)
Frank Barchard287952a2021-11-03 15:26:45 -07008193 .k(k)
8194 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08008195 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07008196 }
8197 }
8198 }
8199
Frank Barcharde22685a2021-11-12 11:36:58 -08008200 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, n_gt_8_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -07008201 TEST_REQUIRES_ARM_NEON_V8;
8202 for (uint32_t n = 9; n < 16; n++) {
8203 for (size_t k = 1; k <= 80; k += 17) {
8204 for (uint32_t m = 1; m <= 2; m++) {
8205 GemmMicrokernelTester()
8206 .mr(2)
8207 .nr(8)
8208 .kr(4)
8209 .sr(1)
8210 .m(m)
8211 .n(n)
8212 .k(k)
8213 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008214 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07008215 }
8216 }
8217 }
8218 }
8219
Frank Barcharde22685a2021-11-12 11:36:58 -08008220 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, n_div_8) {
Frank Barchard287952a2021-11-03 15:26:45 -07008221 TEST_REQUIRES_ARM_NEON_V8;
8222 for (uint32_t n = 16; n <= 24; n += 8) {
8223 for (size_t k = 1; k <= 80; k += 17) {
8224 GemmMicrokernelTester()
8225 .mr(2)
8226 .nr(8)
8227 .kr(4)
8228 .sr(1)
8229 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008230 .n(n)
Frank Barchard287952a2021-11-03 15:26:45 -07008231 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008232 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07008233 }
8234 }
8235 }
8236
Frank Barcharde22685a2021-11-12 11:36:58 -08008237 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, n_div_8_strided_cn) {
Frank Barchard287952a2021-11-03 15:26:45 -07008238 TEST_REQUIRES_ARM_NEON_V8;
8239 for (uint32_t n = 16; n <= 24; n += 8) {
8240 for (size_t k = 1; k <= 80; k += 17) {
8241 GemmMicrokernelTester()
8242 .mr(2)
8243 .nr(8)
8244 .kr(4)
8245 .sr(1)
8246 .m(2)
8247 .n(n)
8248 .k(k)
8249 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08008250 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07008251 }
8252 }
8253 }
8254
Frank Barcharde22685a2021-11-12 11:36:58 -08008255 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, n_div_8_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -07008256 TEST_REQUIRES_ARM_NEON_V8;
8257 for (uint32_t n = 16; n <= 24; n += 8) {
8258 for (size_t k = 1; k <= 80; k += 17) {
8259 for (uint32_t m = 1; m <= 2; m++) {
8260 GemmMicrokernelTester()
8261 .mr(2)
8262 .nr(8)
8263 .kr(4)
8264 .sr(1)
8265 .m(m)
8266 .n(n)
8267 .k(k)
8268 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008269 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07008270 }
8271 }
8272 }
8273 }
8274
Frank Barcharde22685a2021-11-12 11:36:58 -08008275 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, small_kernel) {
Frank Barchard287952a2021-11-03 15:26:45 -07008276 TEST_REQUIRES_ARM_NEON_V8;
8277 for (size_t k = 1; k <= 80; k += 17) {
8278 GemmMicrokernelTester()
8279 .mr(2)
8280 .nr(8)
8281 .kr(4)
8282 .sr(1)
8283 .m(2)
8284 .n(8)
8285 .k(k)
8286 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08008287 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07008288 }
8289 }
8290
Frank Barcharde22685a2021-11-12 11:36:58 -08008291 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, small_kernel_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -07008292 TEST_REQUIRES_ARM_NEON_V8;
8293 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008294 for (uint32_t n = 1; n <= 8; n++) {
8295 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -07008296 GemmMicrokernelTester()
8297 .mr(2)
8298 .nr(8)
8299 .kr(4)
8300 .sr(1)
8301 .m(m)
8302 .n(n)
8303 .k(k)
8304 .ks(3)
8305 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008306 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07008307 }
8308 }
8309 }
8310 }
8311
Frank Barcharde22685a2021-11-12 11:36:58 -08008312 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, n_gt_8_small_kernel) {
Frank Barchard287952a2021-11-03 15:26:45 -07008313 TEST_REQUIRES_ARM_NEON_V8;
8314 for (uint32_t n = 9; n < 16; n++) {
8315 for (size_t k = 1; k <= 80; k += 17) {
8316 GemmMicrokernelTester()
8317 .mr(2)
8318 .nr(8)
8319 .kr(4)
8320 .sr(1)
8321 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008322 .n(n)
Frank Barchard287952a2021-11-03 15:26:45 -07008323 .k(k)
8324 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08008325 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07008326 }
8327 }
8328 }
8329
Frank Barcharde22685a2021-11-12 11:36:58 -08008330 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, n_div_8_small_kernel) {
Frank Barchard287952a2021-11-03 15:26:45 -07008331 TEST_REQUIRES_ARM_NEON_V8;
8332 for (uint32_t n = 16; n <= 24; n += 8) {
8333 for (size_t k = 1; k <= 80; k += 17) {
8334 GemmMicrokernelTester()
8335 .mr(2)
8336 .nr(8)
8337 .kr(4)
8338 .sr(1)
8339 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008340 .n(n)
Frank Barchard287952a2021-11-03 15:26:45 -07008341 .k(k)
8342 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08008343 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07008344 }
8345 }
8346 }
8347
Frank Barcharde22685a2021-11-12 11:36:58 -08008348 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, strided_cm_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -07008349 TEST_REQUIRES_ARM_NEON_V8;
8350 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008351 for (uint32_t n = 1; n <= 8; n++) {
8352 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -07008353 GemmMicrokernelTester()
8354 .mr(2)
8355 .nr(8)
8356 .kr(4)
8357 .sr(1)
8358 .m(m)
8359 .n(n)
8360 .k(k)
8361 .cm_stride(11)
8362 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008363 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07008364 }
8365 }
8366 }
8367 }
8368
Frank Barcharde22685a2021-11-12 11:36:58 -08008369 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, a_offset) {
Frank Barchard287952a2021-11-03 15:26:45 -07008370 TEST_REQUIRES_ARM_NEON_V8;
8371 for (size_t k = 1; k <= 80; k += 17) {
8372 GemmMicrokernelTester()
8373 .mr(2)
8374 .nr(8)
8375 .kr(4)
8376 .sr(1)
8377 .m(2)
8378 .n(8)
8379 .k(k)
8380 .ks(3)
8381 .a_offset(163)
Marat Dukhan50323b82022-01-11 00:12:01 -08008382 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07008383 }
8384 }
8385
Frank Barcharde22685a2021-11-12 11:36:58 -08008386 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, zero) {
Frank Barchard287952a2021-11-03 15:26:45 -07008387 TEST_REQUIRES_ARM_NEON_V8;
Zhi An Ng83844ae2022-01-14 09:52:25 -08008388 for (size_t k = 1; k <= 80; k += 17) {
8389 for (uint32_t mz = 0; mz < 2; mz++) {
Frank Barchard287952a2021-11-03 15:26:45 -07008390 GemmMicrokernelTester()
8391 .mr(2)
8392 .nr(8)
8393 .kr(4)
8394 .sr(1)
8395 .m(2)
8396 .n(8)
8397 .k(k)
8398 .ks(3)
8399 .a_offset(163)
8400 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08008401 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07008402 }
8403 }
8404 }
8405
Frank Barcharde22685a2021-11-12 11:36:58 -08008406 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, qmin) {
Frank Barchard287952a2021-11-03 15:26:45 -07008407 TEST_REQUIRES_ARM_NEON_V8;
8408 GemmMicrokernelTester()
8409 .mr(2)
8410 .nr(8)
8411 .kr(4)
8412 .sr(1)
8413 .m(2)
8414 .n(8)
8415 .k(16)
8416 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008417 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07008418 }
8419
Frank Barcharde22685a2021-11-12 11:36:58 -08008420 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, qmax) {
Frank Barchard287952a2021-11-03 15:26:45 -07008421 TEST_REQUIRES_ARM_NEON_V8;
8422 GemmMicrokernelTester()
8423 .mr(2)
8424 .nr(8)
8425 .kr(4)
8426 .sr(1)
8427 .m(2)
8428 .n(8)
8429 .k(16)
8430 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008431 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07008432 }
8433
Frank Barcharde22685a2021-11-12 11:36:58 -08008434 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, strided_cm) {
Frank Barchard287952a2021-11-03 15:26:45 -07008435 TEST_REQUIRES_ARM_NEON_V8;
8436 GemmMicrokernelTester()
8437 .mr(2)
8438 .nr(8)
8439 .kr(4)
8440 .sr(1)
8441 .m(2)
8442 .n(8)
8443 .k(16)
8444 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08008445 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07008446 }
8447#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
8448
8449
8450#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Frank Barchard64ab1b72021-11-22 10:57:40 -08008451 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, k_eq_16) {
8452 TEST_REQUIRES_ARM_NEON;
8453 GemmMicrokernelTester()
8454 .mr(2)
8455 .nr(8)
8456 .kr(4)
8457 .sr(1)
8458 .m(2)
8459 .n(8)
8460 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08008461 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08008462 }
8463
8464 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, strided_cn) {
8465 TEST_REQUIRES_ARM_NEON;
8466 GemmMicrokernelTester()
8467 .mr(2)
8468 .nr(8)
8469 .kr(4)
8470 .sr(1)
8471 .m(2)
8472 .n(8)
8473 .k(16)
8474 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08008475 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08008476 }
8477
8478 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, k_eq_16_subtile) {
8479 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08008480 for (uint32_t n = 1; n <= 8; n++) {
8481 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -08008482 GemmMicrokernelTester()
8483 .mr(2)
8484 .nr(8)
8485 .kr(4)
8486 .sr(1)
8487 .m(m)
8488 .n(n)
8489 .k(16)
8490 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008491 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08008492 }
8493 }
8494 }
8495
8496 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, k_eq_16_subtile_m) {
8497 TEST_REQUIRES_ARM_NEON;
8498 for (uint32_t m = 1; m <= 2; m++) {
8499 GemmMicrokernelTester()
8500 .mr(2)
8501 .nr(8)
8502 .kr(4)
8503 .sr(1)
8504 .m(m)
8505 .n(8)
8506 .k(16)
8507 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008508 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08008509 }
8510 }
8511
8512 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, k_eq_16_subtile_n) {
8513 TEST_REQUIRES_ARM_NEON;
8514 for (uint32_t n = 1; n <= 8; n++) {
8515 GemmMicrokernelTester()
8516 .mr(2)
8517 .nr(8)
8518 .kr(4)
8519 .sr(1)
8520 .m(2)
8521 .n(n)
8522 .k(16)
8523 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008524 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08008525 }
8526 }
8527
8528 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, k_lt_16) {
8529 TEST_REQUIRES_ARM_NEON;
8530 for (size_t k = 1; k < 16; k++) {
8531 GemmMicrokernelTester()
8532 .mr(2)
8533 .nr(8)
8534 .kr(4)
8535 .sr(1)
8536 .m(2)
8537 .n(8)
8538 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008539 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08008540 }
8541 }
8542
8543 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, k_lt_16_subtile) {
8544 TEST_REQUIRES_ARM_NEON;
8545 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008546 for (uint32_t n = 1; n <= 8; n++) {
8547 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -08008548 GemmMicrokernelTester()
8549 .mr(2)
8550 .nr(8)
8551 .kr(4)
8552 .sr(1)
8553 .m(m)
8554 .n(n)
8555 .k(k)
8556 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008557 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08008558 }
8559 }
8560 }
8561 }
8562
8563 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, k_gt_16) {
8564 TEST_REQUIRES_ARM_NEON;
8565 for (size_t k = 17; k < 32; k++) {
8566 GemmMicrokernelTester()
8567 .mr(2)
8568 .nr(8)
8569 .kr(4)
8570 .sr(1)
8571 .m(2)
8572 .n(8)
8573 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008574 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08008575 }
8576 }
8577
8578 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, k_gt_16_subtile) {
8579 TEST_REQUIRES_ARM_NEON;
8580 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008581 for (uint32_t n = 1; n <= 8; n++) {
8582 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -08008583 GemmMicrokernelTester()
8584 .mr(2)
8585 .nr(8)
8586 .kr(4)
8587 .sr(1)
8588 .m(m)
8589 .n(n)
8590 .k(k)
8591 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008592 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08008593 }
8594 }
8595 }
8596 }
8597
8598 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, k_div_16) {
8599 TEST_REQUIRES_ARM_NEON;
8600 for (size_t k = 32; k <= 160; k += 16) {
8601 GemmMicrokernelTester()
8602 .mr(2)
8603 .nr(8)
8604 .kr(4)
8605 .sr(1)
8606 .m(2)
8607 .n(8)
8608 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008609 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08008610 }
8611 }
8612
8613 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, k_div_16_subtile) {
8614 TEST_REQUIRES_ARM_NEON;
8615 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008616 for (uint32_t n = 1; n <= 8; n++) {
8617 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -08008618 GemmMicrokernelTester()
8619 .mr(2)
8620 .nr(8)
8621 .kr(4)
8622 .sr(1)
8623 .m(m)
8624 .n(n)
8625 .k(k)
8626 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008627 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08008628 }
8629 }
8630 }
8631 }
8632
8633 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, n_gt_8) {
8634 TEST_REQUIRES_ARM_NEON;
8635 for (uint32_t n = 9; n < 16; n++) {
8636 for (size_t k = 1; k <= 80; k += 17) {
8637 GemmMicrokernelTester()
8638 .mr(2)
8639 .nr(8)
8640 .kr(4)
8641 .sr(1)
8642 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008643 .n(n)
Frank Barchard64ab1b72021-11-22 10:57:40 -08008644 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008645 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08008646 }
8647 }
8648 }
8649
8650 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, n_gt_8_strided_cn) {
8651 TEST_REQUIRES_ARM_NEON;
8652 for (uint32_t n = 9; n < 16; n++) {
8653 for (size_t k = 1; k <= 80; k += 17) {
8654 GemmMicrokernelTester()
8655 .mr(2)
8656 .nr(8)
8657 .kr(4)
8658 .sr(1)
8659 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008660 .n(n)
Frank Barchard64ab1b72021-11-22 10:57:40 -08008661 .k(k)
8662 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08008663 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08008664 }
8665 }
8666 }
8667
8668 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, n_gt_8_subtile) {
8669 TEST_REQUIRES_ARM_NEON;
8670 for (uint32_t n = 9; n < 16; n++) {
8671 for (size_t k = 1; k <= 80; k += 17) {
8672 for (uint32_t m = 1; m <= 2; m++) {
8673 GemmMicrokernelTester()
8674 .mr(2)
8675 .nr(8)
8676 .kr(4)
8677 .sr(1)
8678 .m(m)
8679 .n(n)
8680 .k(k)
8681 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008682 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08008683 }
8684 }
8685 }
8686 }
8687
8688 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, n_div_8) {
8689 TEST_REQUIRES_ARM_NEON;
8690 for (uint32_t n = 16; n <= 24; n += 8) {
8691 for (size_t k = 1; k <= 80; k += 17) {
8692 GemmMicrokernelTester()
8693 .mr(2)
8694 .nr(8)
8695 .kr(4)
8696 .sr(1)
8697 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008698 .n(n)
Frank Barchard64ab1b72021-11-22 10:57:40 -08008699 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008700 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08008701 }
8702 }
8703 }
8704
8705 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, n_div_8_strided_cn) {
8706 TEST_REQUIRES_ARM_NEON;
8707 for (uint32_t n = 16; n <= 24; n += 8) {
8708 for (size_t k = 1; k <= 80; k += 17) {
8709 GemmMicrokernelTester()
8710 .mr(2)
8711 .nr(8)
8712 .kr(4)
8713 .sr(1)
8714 .m(2)
8715 .n(n)
8716 .k(k)
8717 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08008718 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08008719 }
8720 }
8721 }
8722
8723 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, n_div_8_subtile) {
8724 TEST_REQUIRES_ARM_NEON;
8725 for (uint32_t n = 16; n <= 24; n += 8) {
8726 for (size_t k = 1; k <= 80; k += 17) {
8727 for (uint32_t m = 1; m <= 2; m++) {
8728 GemmMicrokernelTester()
8729 .mr(2)
8730 .nr(8)
8731 .kr(4)
8732 .sr(1)
8733 .m(m)
8734 .n(n)
8735 .k(k)
8736 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008737 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08008738 }
8739 }
8740 }
8741 }
8742
8743 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, small_kernel) {
8744 TEST_REQUIRES_ARM_NEON;
8745 for (size_t k = 1; k <= 80; k += 17) {
8746 GemmMicrokernelTester()
8747 .mr(2)
8748 .nr(8)
8749 .kr(4)
8750 .sr(1)
8751 .m(2)
8752 .n(8)
8753 .k(k)
8754 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08008755 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08008756 }
8757 }
8758
8759 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, small_kernel_subtile) {
8760 TEST_REQUIRES_ARM_NEON;
8761 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008762 for (uint32_t n = 1; n <= 8; n++) {
8763 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -08008764 GemmMicrokernelTester()
8765 .mr(2)
8766 .nr(8)
8767 .kr(4)
8768 .sr(1)
8769 .m(m)
8770 .n(n)
8771 .k(k)
8772 .ks(3)
8773 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008774 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08008775 }
8776 }
8777 }
8778 }
8779
8780 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, n_gt_8_small_kernel) {
8781 TEST_REQUIRES_ARM_NEON;
8782 for (uint32_t n = 9; n < 16; n++) {
8783 for (size_t k = 1; k <= 80; k += 17) {
8784 GemmMicrokernelTester()
8785 .mr(2)
8786 .nr(8)
8787 .kr(4)
8788 .sr(1)
8789 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008790 .n(n)
Frank Barchard64ab1b72021-11-22 10:57:40 -08008791 .k(k)
8792 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08008793 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08008794 }
8795 }
8796 }
8797
8798 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, n_div_8_small_kernel) {
8799 TEST_REQUIRES_ARM_NEON;
8800 for (uint32_t n = 16; n <= 24; n += 8) {
8801 for (size_t k = 1; k <= 80; k += 17) {
8802 GemmMicrokernelTester()
8803 .mr(2)
8804 .nr(8)
8805 .kr(4)
8806 .sr(1)
8807 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008808 .n(n)
Frank Barchard64ab1b72021-11-22 10:57:40 -08008809 .k(k)
8810 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08008811 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08008812 }
8813 }
8814 }
8815
8816 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, strided_cm_subtile) {
8817 TEST_REQUIRES_ARM_NEON;
8818 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008819 for (uint32_t n = 1; n <= 8; n++) {
8820 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -08008821 GemmMicrokernelTester()
8822 .mr(2)
8823 .nr(8)
8824 .kr(4)
8825 .sr(1)
8826 .m(m)
8827 .n(n)
8828 .k(k)
8829 .cm_stride(11)
8830 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008831 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08008832 }
8833 }
8834 }
8835 }
8836
8837 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, a_offset) {
8838 TEST_REQUIRES_ARM_NEON;
8839 for (size_t k = 1; k <= 80; k += 17) {
8840 GemmMicrokernelTester()
8841 .mr(2)
8842 .nr(8)
8843 .kr(4)
8844 .sr(1)
8845 .m(2)
8846 .n(8)
8847 .k(k)
8848 .ks(3)
8849 .a_offset(163)
Marat Dukhan50323b82022-01-11 00:12:01 -08008850 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08008851 }
8852 }
8853
8854 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, zero) {
8855 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08008856 for (size_t k = 1; k <= 80; k += 17) {
8857 for (uint32_t mz = 0; mz < 2; mz++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -08008858 GemmMicrokernelTester()
8859 .mr(2)
8860 .nr(8)
8861 .kr(4)
8862 .sr(1)
8863 .m(2)
8864 .n(8)
8865 .k(k)
8866 .ks(3)
8867 .a_offset(163)
8868 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08008869 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08008870 }
8871 }
8872 }
8873
8874 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, qmin) {
8875 TEST_REQUIRES_ARM_NEON;
8876 GemmMicrokernelTester()
8877 .mr(2)
8878 .nr(8)
8879 .kr(4)
8880 .sr(1)
8881 .m(2)
8882 .n(8)
8883 .k(16)
8884 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008885 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08008886 }
8887
8888 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, qmax) {
8889 TEST_REQUIRES_ARM_NEON;
8890 GemmMicrokernelTester()
8891 .mr(2)
8892 .nr(8)
8893 .kr(4)
8894 .sr(1)
8895 .m(2)
8896 .n(8)
8897 .k(16)
8898 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008899 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08008900 }
8901
8902 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, strided_cm) {
8903 TEST_REQUIRES_ARM_NEON;
8904 GemmMicrokernelTester()
8905 .mr(2)
8906 .nr(8)
8907 .kr(4)
8908 .sr(1)
8909 .m(2)
8910 .n(8)
8911 .k(16)
8912 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08008913 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08008914 }
8915#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
8916
8917
8918#if XNN_ARCH_ARM || XNN_ARCH_ARM64
8919 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, k_eq_16) {
8920 TEST_REQUIRES_ARM_NEON_V8;
8921 GemmMicrokernelTester()
8922 .mr(1)
8923 .nr(8)
8924 .kr(4)
8925 .sr(1)
8926 .m(1)
8927 .n(8)
8928 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08008929 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08008930 }
8931
8932 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, strided_cn) {
8933 TEST_REQUIRES_ARM_NEON_V8;
8934 GemmMicrokernelTester()
8935 .mr(1)
8936 .nr(8)
8937 .kr(4)
8938 .sr(1)
8939 .m(1)
8940 .n(8)
8941 .k(16)
8942 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08008943 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08008944 }
8945
8946 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, k_eq_16_subtile) {
8947 TEST_REQUIRES_ARM_NEON_V8;
Zhi An Ng83844ae2022-01-14 09:52:25 -08008948 for (uint32_t n = 1; n <= 8; n++) {
8949 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -08008950 GemmMicrokernelTester()
8951 .mr(1)
8952 .nr(8)
8953 .kr(4)
8954 .sr(1)
8955 .m(m)
8956 .n(n)
8957 .k(16)
8958 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008959 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08008960 }
8961 }
8962 }
8963
8964 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, k_eq_16_subtile_m) {
8965 TEST_REQUIRES_ARM_NEON_V8;
8966 for (uint32_t m = 1; m <= 1; m++) {
8967 GemmMicrokernelTester()
8968 .mr(1)
8969 .nr(8)
8970 .kr(4)
8971 .sr(1)
8972 .m(m)
8973 .n(8)
8974 .k(16)
8975 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008976 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08008977 }
8978 }
8979
8980 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, k_eq_16_subtile_n) {
8981 TEST_REQUIRES_ARM_NEON_V8;
8982 for (uint32_t n = 1; n <= 8; n++) {
8983 GemmMicrokernelTester()
8984 .mr(1)
8985 .nr(8)
8986 .kr(4)
8987 .sr(1)
8988 .m(1)
8989 .n(n)
8990 .k(16)
8991 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008992 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08008993 }
8994 }
8995
8996 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, k_lt_16) {
8997 TEST_REQUIRES_ARM_NEON_V8;
8998 for (size_t k = 1; k < 16; k++) {
8999 GemmMicrokernelTester()
9000 .mr(1)
9001 .nr(8)
9002 .kr(4)
9003 .sr(1)
9004 .m(1)
9005 .n(8)
9006 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009007 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08009008 }
9009 }
9010
9011 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, k_lt_16_subtile) {
9012 TEST_REQUIRES_ARM_NEON_V8;
9013 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009014 for (uint32_t n = 1; n <= 8; n++) {
9015 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -08009016 GemmMicrokernelTester()
9017 .mr(1)
9018 .nr(8)
9019 .kr(4)
9020 .sr(1)
9021 .m(m)
9022 .n(n)
9023 .k(k)
9024 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009025 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08009026 }
9027 }
9028 }
9029 }
9030
9031 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, k_gt_16) {
9032 TEST_REQUIRES_ARM_NEON_V8;
9033 for (size_t k = 17; k < 32; k++) {
9034 GemmMicrokernelTester()
9035 .mr(1)
9036 .nr(8)
9037 .kr(4)
9038 .sr(1)
9039 .m(1)
9040 .n(8)
9041 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009042 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08009043 }
9044 }
9045
9046 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, k_gt_16_subtile) {
9047 TEST_REQUIRES_ARM_NEON_V8;
9048 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009049 for (uint32_t n = 1; n <= 8; n++) {
9050 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -08009051 GemmMicrokernelTester()
9052 .mr(1)
9053 .nr(8)
9054 .kr(4)
9055 .sr(1)
9056 .m(m)
9057 .n(n)
9058 .k(k)
9059 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009060 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08009061 }
9062 }
9063 }
9064 }
9065
9066 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, k_div_16) {
9067 TEST_REQUIRES_ARM_NEON_V8;
9068 for (size_t k = 32; k <= 160; k += 16) {
9069 GemmMicrokernelTester()
9070 .mr(1)
9071 .nr(8)
9072 .kr(4)
9073 .sr(1)
9074 .m(1)
9075 .n(8)
9076 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009077 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08009078 }
9079 }
9080
9081 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, k_div_16_subtile) {
9082 TEST_REQUIRES_ARM_NEON_V8;
9083 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009084 for (uint32_t n = 1; n <= 8; n++) {
9085 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -08009086 GemmMicrokernelTester()
9087 .mr(1)
9088 .nr(8)
9089 .kr(4)
9090 .sr(1)
9091 .m(m)
9092 .n(n)
9093 .k(k)
9094 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009095 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08009096 }
9097 }
9098 }
9099 }
9100
9101 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, n_gt_8) {
9102 TEST_REQUIRES_ARM_NEON_V8;
9103 for (uint32_t n = 9; n < 16; n++) {
9104 for (size_t k = 1; k <= 80; k += 17) {
9105 GemmMicrokernelTester()
9106 .mr(1)
9107 .nr(8)
9108 .kr(4)
9109 .sr(1)
9110 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009111 .n(n)
Frank Barchard64ab1b72021-11-22 10:57:40 -08009112 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009113 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08009114 }
9115 }
9116 }
9117
9118 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, n_gt_8_strided_cn) {
9119 TEST_REQUIRES_ARM_NEON_V8;
9120 for (uint32_t n = 9; n < 16; n++) {
9121 for (size_t k = 1; k <= 80; k += 17) {
9122 GemmMicrokernelTester()
9123 .mr(1)
9124 .nr(8)
9125 .kr(4)
9126 .sr(1)
9127 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009128 .n(n)
Frank Barchard64ab1b72021-11-22 10:57:40 -08009129 .k(k)
9130 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08009131 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08009132 }
9133 }
9134 }
9135
9136 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, n_gt_8_subtile) {
9137 TEST_REQUIRES_ARM_NEON_V8;
9138 for (uint32_t n = 9; n < 16; n++) {
9139 for (size_t k = 1; k <= 80; k += 17) {
9140 for (uint32_t m = 1; m <= 1; m++) {
9141 GemmMicrokernelTester()
9142 .mr(1)
9143 .nr(8)
9144 .kr(4)
9145 .sr(1)
9146 .m(m)
9147 .n(n)
9148 .k(k)
9149 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009150 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08009151 }
9152 }
9153 }
9154 }
9155
9156 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, n_div_8) {
9157 TEST_REQUIRES_ARM_NEON_V8;
9158 for (uint32_t n = 16; n <= 24; n += 8) {
9159 for (size_t k = 1; k <= 80; k += 17) {
9160 GemmMicrokernelTester()
9161 .mr(1)
9162 .nr(8)
9163 .kr(4)
9164 .sr(1)
9165 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009166 .n(n)
Frank Barchard64ab1b72021-11-22 10:57:40 -08009167 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009168 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08009169 }
9170 }
9171 }
9172
9173 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, n_div_8_strided_cn) {
9174 TEST_REQUIRES_ARM_NEON_V8;
9175 for (uint32_t n = 16; n <= 24; n += 8) {
9176 for (size_t k = 1; k <= 80; k += 17) {
9177 GemmMicrokernelTester()
9178 .mr(1)
9179 .nr(8)
9180 .kr(4)
9181 .sr(1)
9182 .m(1)
9183 .n(n)
9184 .k(k)
9185 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08009186 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08009187 }
9188 }
9189 }
9190
9191 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, n_div_8_subtile) {
9192 TEST_REQUIRES_ARM_NEON_V8;
9193 for (uint32_t n = 16; n <= 24; n += 8) {
9194 for (size_t k = 1; k <= 80; k += 17) {
9195 for (uint32_t m = 1; m <= 1; m++) {
9196 GemmMicrokernelTester()
9197 .mr(1)
9198 .nr(8)
9199 .kr(4)
9200 .sr(1)
9201 .m(m)
9202 .n(n)
9203 .k(k)
9204 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009205 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08009206 }
9207 }
9208 }
9209 }
9210
9211 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, small_kernel) {
9212 TEST_REQUIRES_ARM_NEON_V8;
9213 for (size_t k = 1; k <= 80; k += 17) {
9214 GemmMicrokernelTester()
9215 .mr(1)
9216 .nr(8)
9217 .kr(4)
9218 .sr(1)
9219 .m(1)
9220 .n(8)
9221 .k(k)
9222 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08009223 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08009224 }
9225 }
9226
9227 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, small_kernel_subtile) {
9228 TEST_REQUIRES_ARM_NEON_V8;
9229 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009230 for (uint32_t n = 1; n <= 8; n++) {
9231 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -08009232 GemmMicrokernelTester()
9233 .mr(1)
9234 .nr(8)
9235 .kr(4)
9236 .sr(1)
9237 .m(m)
9238 .n(n)
9239 .k(k)
9240 .ks(3)
9241 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009242 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08009243 }
9244 }
9245 }
9246 }
9247
9248 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, n_gt_8_small_kernel) {
9249 TEST_REQUIRES_ARM_NEON_V8;
9250 for (uint32_t n = 9; n < 16; n++) {
9251 for (size_t k = 1; k <= 80; k += 17) {
9252 GemmMicrokernelTester()
9253 .mr(1)
9254 .nr(8)
9255 .kr(4)
9256 .sr(1)
9257 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009258 .n(n)
Frank Barchard64ab1b72021-11-22 10:57:40 -08009259 .k(k)
9260 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08009261 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08009262 }
9263 }
9264 }
9265
9266 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, n_div_8_small_kernel) {
9267 TEST_REQUIRES_ARM_NEON_V8;
9268 for (uint32_t n = 16; n <= 24; n += 8) {
9269 for (size_t k = 1; k <= 80; k += 17) {
9270 GemmMicrokernelTester()
9271 .mr(1)
9272 .nr(8)
9273 .kr(4)
9274 .sr(1)
9275 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009276 .n(n)
Frank Barchard64ab1b72021-11-22 10:57:40 -08009277 .k(k)
9278 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08009279 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08009280 }
9281 }
9282 }
9283
9284 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, strided_cm_subtile) {
9285 TEST_REQUIRES_ARM_NEON_V8;
9286 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009287 for (uint32_t n = 1; n <= 8; n++) {
9288 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -08009289 GemmMicrokernelTester()
9290 .mr(1)
9291 .nr(8)
9292 .kr(4)
9293 .sr(1)
9294 .m(m)
9295 .n(n)
9296 .k(k)
9297 .cm_stride(11)
9298 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009299 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08009300 }
9301 }
9302 }
9303 }
9304
9305 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, a_offset) {
9306 TEST_REQUIRES_ARM_NEON_V8;
9307 for (size_t k = 1; k <= 80; k += 17) {
9308 GemmMicrokernelTester()
9309 .mr(1)
9310 .nr(8)
9311 .kr(4)
9312 .sr(1)
9313 .m(1)
9314 .n(8)
9315 .k(k)
9316 .ks(3)
9317 .a_offset(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08009318 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08009319 }
9320 }
9321
9322 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, zero) {
9323 TEST_REQUIRES_ARM_NEON_V8;
Zhi An Ng83844ae2022-01-14 09:52:25 -08009324 for (size_t k = 1; k <= 80; k += 17) {
9325 for (uint32_t mz = 0; mz < 1; mz++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -08009326 GemmMicrokernelTester()
9327 .mr(1)
9328 .nr(8)
9329 .kr(4)
9330 .sr(1)
9331 .m(1)
9332 .n(8)
9333 .k(k)
9334 .ks(3)
9335 .a_offset(83)
9336 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08009337 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08009338 }
9339 }
9340 }
9341
9342 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, qmin) {
9343 TEST_REQUIRES_ARM_NEON_V8;
9344 GemmMicrokernelTester()
9345 .mr(1)
9346 .nr(8)
9347 .kr(4)
9348 .sr(1)
9349 .m(1)
9350 .n(8)
9351 .k(16)
9352 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009353 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08009354 }
9355
9356 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, qmax) {
9357 TEST_REQUIRES_ARM_NEON_V8;
9358 GemmMicrokernelTester()
9359 .mr(1)
9360 .nr(8)
9361 .kr(4)
9362 .sr(1)
9363 .m(1)
9364 .n(8)
9365 .k(16)
9366 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009367 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08009368 }
9369
9370 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, strided_cm) {
9371 TEST_REQUIRES_ARM_NEON_V8;
9372 GemmMicrokernelTester()
9373 .mr(1)
9374 .nr(8)
9375 .kr(4)
9376 .sr(1)
9377 .m(1)
9378 .n(8)
9379 .k(16)
9380 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08009381 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08009382 }
9383#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
9384
9385
9386#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Frank Barchard64ab1b72021-11-22 10:57:40 -08009387 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, k_eq_16) {
9388 TEST_REQUIRES_ARM_NEON;
9389 GemmMicrokernelTester()
9390 .mr(1)
9391 .nr(8)
9392 .kr(4)
9393 .sr(1)
9394 .m(1)
9395 .n(8)
9396 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08009397 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08009398 }
9399
9400 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, strided_cn) {
9401 TEST_REQUIRES_ARM_NEON;
9402 GemmMicrokernelTester()
9403 .mr(1)
9404 .nr(8)
9405 .kr(4)
9406 .sr(1)
9407 .m(1)
9408 .n(8)
9409 .k(16)
9410 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08009411 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08009412 }
9413
9414 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, k_eq_16_subtile) {
9415 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08009416 for (uint32_t n = 1; n <= 8; n++) {
9417 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -08009418 GemmMicrokernelTester()
9419 .mr(1)
9420 .nr(8)
9421 .kr(4)
9422 .sr(1)
9423 .m(m)
9424 .n(n)
9425 .k(16)
9426 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009427 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08009428 }
9429 }
9430 }
9431
9432 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, k_eq_16_subtile_m) {
9433 TEST_REQUIRES_ARM_NEON;
9434 for (uint32_t m = 1; m <= 1; m++) {
9435 GemmMicrokernelTester()
9436 .mr(1)
9437 .nr(8)
9438 .kr(4)
9439 .sr(1)
9440 .m(m)
9441 .n(8)
9442 .k(16)
9443 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009444 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08009445 }
9446 }
9447
9448 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, k_eq_16_subtile_n) {
9449 TEST_REQUIRES_ARM_NEON;
9450 for (uint32_t n = 1; n <= 8; n++) {
9451 GemmMicrokernelTester()
9452 .mr(1)
9453 .nr(8)
9454 .kr(4)
9455 .sr(1)
9456 .m(1)
9457 .n(n)
9458 .k(16)
9459 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009460 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08009461 }
9462 }
9463
9464 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, k_lt_16) {
9465 TEST_REQUIRES_ARM_NEON;
9466 for (size_t k = 1; k < 16; k++) {
9467 GemmMicrokernelTester()
9468 .mr(1)
9469 .nr(8)
9470 .kr(4)
9471 .sr(1)
9472 .m(1)
9473 .n(8)
9474 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009475 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08009476 }
9477 }
9478
9479 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, k_lt_16_subtile) {
9480 TEST_REQUIRES_ARM_NEON;
9481 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009482 for (uint32_t n = 1; n <= 8; n++) {
9483 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -08009484 GemmMicrokernelTester()
9485 .mr(1)
9486 .nr(8)
9487 .kr(4)
9488 .sr(1)
9489 .m(m)
9490 .n(n)
9491 .k(k)
9492 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009493 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08009494 }
9495 }
9496 }
9497 }
9498
9499 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, k_gt_16) {
9500 TEST_REQUIRES_ARM_NEON;
9501 for (size_t k = 17; k < 32; k++) {
9502 GemmMicrokernelTester()
9503 .mr(1)
9504 .nr(8)
9505 .kr(4)
9506 .sr(1)
9507 .m(1)
9508 .n(8)
9509 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009510 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08009511 }
9512 }
9513
9514 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, k_gt_16_subtile) {
9515 TEST_REQUIRES_ARM_NEON;
9516 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009517 for (uint32_t n = 1; n <= 8; n++) {
9518 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -08009519 GemmMicrokernelTester()
9520 .mr(1)
9521 .nr(8)
9522 .kr(4)
9523 .sr(1)
9524 .m(m)
9525 .n(n)
9526 .k(k)
9527 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009528 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08009529 }
9530 }
9531 }
9532 }
9533
9534 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, k_div_16) {
9535 TEST_REQUIRES_ARM_NEON;
9536 for (size_t k = 32; k <= 160; k += 16) {
9537 GemmMicrokernelTester()
9538 .mr(1)
9539 .nr(8)
9540 .kr(4)
9541 .sr(1)
9542 .m(1)
9543 .n(8)
9544 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009545 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08009546 }
9547 }
9548
9549 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, k_div_16_subtile) {
9550 TEST_REQUIRES_ARM_NEON;
9551 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009552 for (uint32_t n = 1; n <= 8; n++) {
9553 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -08009554 GemmMicrokernelTester()
9555 .mr(1)
9556 .nr(8)
9557 .kr(4)
9558 .sr(1)
9559 .m(m)
9560 .n(n)
9561 .k(k)
9562 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009563 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08009564 }
9565 }
9566 }
9567 }
9568
9569 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, n_gt_8) {
9570 TEST_REQUIRES_ARM_NEON;
9571 for (uint32_t n = 9; n < 16; n++) {
9572 for (size_t k = 1; k <= 80; k += 17) {
9573 GemmMicrokernelTester()
9574 .mr(1)
9575 .nr(8)
9576 .kr(4)
9577 .sr(1)
9578 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009579 .n(n)
Frank Barchard64ab1b72021-11-22 10:57:40 -08009580 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009581 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08009582 }
9583 }
9584 }
9585
9586 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, n_gt_8_strided_cn) {
9587 TEST_REQUIRES_ARM_NEON;
9588 for (uint32_t n = 9; n < 16; n++) {
9589 for (size_t k = 1; k <= 80; k += 17) {
9590 GemmMicrokernelTester()
9591 .mr(1)
9592 .nr(8)
9593 .kr(4)
9594 .sr(1)
9595 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009596 .n(n)
Frank Barchard64ab1b72021-11-22 10:57:40 -08009597 .k(k)
9598 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08009599 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08009600 }
9601 }
9602 }
9603
9604 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, n_gt_8_subtile) {
9605 TEST_REQUIRES_ARM_NEON;
9606 for (uint32_t n = 9; n < 16; n++) {
9607 for (size_t k = 1; k <= 80; k += 17) {
9608 for (uint32_t m = 1; m <= 1; m++) {
9609 GemmMicrokernelTester()
9610 .mr(1)
9611 .nr(8)
9612 .kr(4)
9613 .sr(1)
9614 .m(m)
9615 .n(n)
9616 .k(k)
9617 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009618 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08009619 }
9620 }
9621 }
9622 }
9623
9624 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, n_div_8) {
9625 TEST_REQUIRES_ARM_NEON;
9626 for (uint32_t n = 16; n <= 24; n += 8) {
9627 for (size_t k = 1; k <= 80; k += 17) {
9628 GemmMicrokernelTester()
9629 .mr(1)
9630 .nr(8)
9631 .kr(4)
9632 .sr(1)
9633 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009634 .n(n)
Frank Barchard64ab1b72021-11-22 10:57:40 -08009635 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009636 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08009637 }
9638 }
9639 }
9640
9641 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, n_div_8_strided_cn) {
9642 TEST_REQUIRES_ARM_NEON;
9643 for (uint32_t n = 16; n <= 24; n += 8) {
9644 for (size_t k = 1; k <= 80; k += 17) {
9645 GemmMicrokernelTester()
9646 .mr(1)
9647 .nr(8)
9648 .kr(4)
9649 .sr(1)
9650 .m(1)
9651 .n(n)
9652 .k(k)
9653 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08009654 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08009655 }
9656 }
9657 }
9658
9659 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, n_div_8_subtile) {
9660 TEST_REQUIRES_ARM_NEON;
9661 for (uint32_t n = 16; n <= 24; n += 8) {
9662 for (size_t k = 1; k <= 80; k += 17) {
9663 for (uint32_t m = 1; m <= 1; m++) {
9664 GemmMicrokernelTester()
9665 .mr(1)
9666 .nr(8)
9667 .kr(4)
9668 .sr(1)
9669 .m(m)
9670 .n(n)
9671 .k(k)
9672 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009673 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08009674 }
9675 }
9676 }
9677 }
9678
9679 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, small_kernel) {
9680 TEST_REQUIRES_ARM_NEON;
9681 for (size_t k = 1; k <= 80; k += 17) {
9682 GemmMicrokernelTester()
9683 .mr(1)
9684 .nr(8)
9685 .kr(4)
9686 .sr(1)
9687 .m(1)
9688 .n(8)
9689 .k(k)
9690 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08009691 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08009692 }
9693 }
9694
9695 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, small_kernel_subtile) {
9696 TEST_REQUIRES_ARM_NEON;
9697 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009698 for (uint32_t n = 1; n <= 8; n++) {
9699 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -08009700 GemmMicrokernelTester()
9701 .mr(1)
9702 .nr(8)
9703 .kr(4)
9704 .sr(1)
9705 .m(m)
9706 .n(n)
9707 .k(k)
9708 .ks(3)
9709 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009710 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08009711 }
9712 }
9713 }
9714 }
9715
9716 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, n_gt_8_small_kernel) {
9717 TEST_REQUIRES_ARM_NEON;
9718 for (uint32_t n = 9; n < 16; n++) {
9719 for (size_t k = 1; k <= 80; k += 17) {
9720 GemmMicrokernelTester()
9721 .mr(1)
9722 .nr(8)
9723 .kr(4)
9724 .sr(1)
9725 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009726 .n(n)
Frank Barchard64ab1b72021-11-22 10:57:40 -08009727 .k(k)
9728 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08009729 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08009730 }
9731 }
9732 }
9733
9734 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, n_div_8_small_kernel) {
9735 TEST_REQUIRES_ARM_NEON;
9736 for (uint32_t n = 16; n <= 24; n += 8) {
9737 for (size_t k = 1; k <= 80; k += 17) {
9738 GemmMicrokernelTester()
9739 .mr(1)
9740 .nr(8)
9741 .kr(4)
9742 .sr(1)
9743 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009744 .n(n)
Frank Barchard64ab1b72021-11-22 10:57:40 -08009745 .k(k)
9746 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08009747 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08009748 }
9749 }
9750 }
9751
9752 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, strided_cm_subtile) {
9753 TEST_REQUIRES_ARM_NEON;
9754 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009755 for (uint32_t n = 1; n <= 8; n++) {
9756 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -08009757 GemmMicrokernelTester()
9758 .mr(1)
9759 .nr(8)
9760 .kr(4)
9761 .sr(1)
9762 .m(m)
9763 .n(n)
9764 .k(k)
9765 .cm_stride(11)
9766 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009767 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08009768 }
9769 }
9770 }
9771 }
9772
9773 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, a_offset) {
9774 TEST_REQUIRES_ARM_NEON;
9775 for (size_t k = 1; k <= 80; k += 17) {
9776 GemmMicrokernelTester()
9777 .mr(1)
9778 .nr(8)
9779 .kr(4)
9780 .sr(1)
9781 .m(1)
9782 .n(8)
9783 .k(k)
9784 .ks(3)
9785 .a_offset(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08009786 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08009787 }
9788 }
9789
9790 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, zero) {
9791 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08009792 for (size_t k = 1; k <= 80; k += 17) {
9793 for (uint32_t mz = 0; mz < 1; mz++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -08009794 GemmMicrokernelTester()
9795 .mr(1)
9796 .nr(8)
9797 .kr(4)
9798 .sr(1)
9799 .m(1)
9800 .n(8)
9801 .k(k)
9802 .ks(3)
9803 .a_offset(83)
9804 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08009805 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08009806 }
9807 }
9808 }
9809
9810 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, qmin) {
9811 TEST_REQUIRES_ARM_NEON;
9812 GemmMicrokernelTester()
9813 .mr(1)
9814 .nr(8)
9815 .kr(4)
9816 .sr(1)
9817 .m(1)
9818 .n(8)
9819 .k(16)
9820 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009821 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08009822 }
9823
9824 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, qmax) {
9825 TEST_REQUIRES_ARM_NEON;
9826 GemmMicrokernelTester()
9827 .mr(1)
9828 .nr(8)
9829 .kr(4)
9830 .sr(1)
9831 .m(1)
9832 .n(8)
9833 .k(16)
9834 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009835 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08009836 }
9837
9838 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, strided_cm) {
9839 TEST_REQUIRES_ARM_NEON;
9840 GemmMicrokernelTester()
9841 .mr(1)
9842 .nr(8)
9843 .kr(4)
9844 .sr(1)
9845 .m(1)
9846 .n(8)
9847 .k(16)
9848 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08009849 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08009850 }
9851#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
9852
9853
9854#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Frank Barchard64ab1b72021-11-22 10:57:40 -08009855 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, k_eq_16) {
9856 TEST_REQUIRES_ARM_NEON_V8;
9857 GemmMicrokernelTester()
9858 .mr(2)
9859 .nr(8)
9860 .kr(4)
9861 .sr(1)
9862 .m(2)
9863 .n(8)
9864 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08009865 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08009866 }
9867
9868 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, strided_cn) {
9869 TEST_REQUIRES_ARM_NEON_V8;
9870 GemmMicrokernelTester()
9871 .mr(2)
9872 .nr(8)
9873 .kr(4)
9874 .sr(1)
9875 .m(2)
9876 .n(8)
9877 .k(16)
9878 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08009879 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08009880 }
9881
9882 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, k_eq_16_subtile) {
9883 TEST_REQUIRES_ARM_NEON_V8;
Zhi An Ng83844ae2022-01-14 09:52:25 -08009884 for (uint32_t n = 1; n <= 8; n++) {
9885 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -08009886 GemmMicrokernelTester()
9887 .mr(2)
9888 .nr(8)
9889 .kr(4)
9890 .sr(1)
9891 .m(m)
9892 .n(n)
9893 .k(16)
9894 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009895 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08009896 }
9897 }
9898 }
9899
9900 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, k_eq_16_subtile_m) {
9901 TEST_REQUIRES_ARM_NEON_V8;
9902 for (uint32_t m = 1; m <= 2; m++) {
9903 GemmMicrokernelTester()
9904 .mr(2)
9905 .nr(8)
9906 .kr(4)
9907 .sr(1)
9908 .m(m)
9909 .n(8)
9910 .k(16)
9911 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009912 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08009913 }
9914 }
9915
9916 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, k_eq_16_subtile_n) {
9917 TEST_REQUIRES_ARM_NEON_V8;
9918 for (uint32_t n = 1; n <= 8; n++) {
9919 GemmMicrokernelTester()
9920 .mr(2)
9921 .nr(8)
9922 .kr(4)
9923 .sr(1)
9924 .m(2)
9925 .n(n)
9926 .k(16)
9927 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009928 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08009929 }
9930 }
9931
9932 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, k_lt_16) {
9933 TEST_REQUIRES_ARM_NEON_V8;
9934 for (size_t k = 1; k < 16; k++) {
9935 GemmMicrokernelTester()
9936 .mr(2)
9937 .nr(8)
9938 .kr(4)
9939 .sr(1)
9940 .m(2)
9941 .n(8)
9942 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009943 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08009944 }
9945 }
9946
9947 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, k_lt_16_subtile) {
9948 TEST_REQUIRES_ARM_NEON_V8;
9949 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009950 for (uint32_t n = 1; n <= 8; n++) {
9951 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -08009952 GemmMicrokernelTester()
9953 .mr(2)
9954 .nr(8)
9955 .kr(4)
9956 .sr(1)
9957 .m(m)
9958 .n(n)
9959 .k(k)
9960 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009961 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08009962 }
9963 }
9964 }
9965 }
9966
9967 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, k_gt_16) {
9968 TEST_REQUIRES_ARM_NEON_V8;
9969 for (size_t k = 17; k < 32; k++) {
9970 GemmMicrokernelTester()
9971 .mr(2)
9972 .nr(8)
9973 .kr(4)
9974 .sr(1)
9975 .m(2)
9976 .n(8)
9977 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009978 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08009979 }
9980 }
9981
9982 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, k_gt_16_subtile) {
9983 TEST_REQUIRES_ARM_NEON_V8;
9984 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009985 for (uint32_t n = 1; n <= 8; n++) {
9986 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -08009987 GemmMicrokernelTester()
9988 .mr(2)
9989 .nr(8)
9990 .kr(4)
9991 .sr(1)
9992 .m(m)
9993 .n(n)
9994 .k(k)
9995 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009996 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08009997 }
9998 }
9999 }
10000 }
10001
10002 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, k_div_16) {
10003 TEST_REQUIRES_ARM_NEON_V8;
10004 for (size_t k = 32; k <= 160; k += 16) {
10005 GemmMicrokernelTester()
10006 .mr(2)
10007 .nr(8)
10008 .kr(4)
10009 .sr(1)
10010 .m(2)
10011 .n(8)
10012 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080010013 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -080010014 }
10015 }
10016
10017 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, k_div_16_subtile) {
10018 TEST_REQUIRES_ARM_NEON_V8;
10019 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010020 for (uint32_t n = 1; n <= 8; n++) {
10021 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080010022 GemmMicrokernelTester()
10023 .mr(2)
10024 .nr(8)
10025 .kr(4)
10026 .sr(1)
10027 .m(m)
10028 .n(n)
10029 .k(k)
10030 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010031 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -080010032 }
10033 }
10034 }
10035 }
10036
10037 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, n_gt_8) {
10038 TEST_REQUIRES_ARM_NEON_V8;
10039 for (uint32_t n = 9; n < 16; n++) {
10040 for (size_t k = 1; k <= 80; k += 17) {
10041 GemmMicrokernelTester()
10042 .mr(2)
10043 .nr(8)
10044 .kr(4)
10045 .sr(1)
10046 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010047 .n(n)
Frank Barchard64ab1b72021-11-22 10:57:40 -080010048 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080010049 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -080010050 }
10051 }
10052 }
10053
10054 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, n_gt_8_strided_cn) {
10055 TEST_REQUIRES_ARM_NEON_V8;
10056 for (uint32_t n = 9; n < 16; n++) {
10057 for (size_t k = 1; k <= 80; k += 17) {
10058 GemmMicrokernelTester()
10059 .mr(2)
10060 .nr(8)
10061 .kr(4)
10062 .sr(1)
10063 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010064 .n(n)
Frank Barchard64ab1b72021-11-22 10:57:40 -080010065 .k(k)
10066 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080010067 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -080010068 }
10069 }
10070 }
10071
10072 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, n_gt_8_subtile) {
10073 TEST_REQUIRES_ARM_NEON_V8;
10074 for (uint32_t n = 9; n < 16; n++) {
10075 for (size_t k = 1; k <= 80; k += 17) {
10076 for (uint32_t m = 1; m <= 2; m++) {
10077 GemmMicrokernelTester()
10078 .mr(2)
10079 .nr(8)
10080 .kr(4)
10081 .sr(1)
10082 .m(m)
10083 .n(n)
10084 .k(k)
10085 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010086 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -080010087 }
10088 }
10089 }
10090 }
10091
10092 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, n_div_8) {
10093 TEST_REQUIRES_ARM_NEON_V8;
10094 for (uint32_t n = 16; n <= 24; n += 8) {
10095 for (size_t k = 1; k <= 80; k += 17) {
10096 GemmMicrokernelTester()
10097 .mr(2)
10098 .nr(8)
10099 .kr(4)
10100 .sr(1)
10101 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010102 .n(n)
Frank Barchard64ab1b72021-11-22 10:57:40 -080010103 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080010104 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -080010105 }
10106 }
10107 }
10108
10109 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, n_div_8_strided_cn) {
10110 TEST_REQUIRES_ARM_NEON_V8;
10111 for (uint32_t n = 16; n <= 24; n += 8) {
10112 for (size_t k = 1; k <= 80; k += 17) {
10113 GemmMicrokernelTester()
10114 .mr(2)
10115 .nr(8)
10116 .kr(4)
10117 .sr(1)
10118 .m(2)
10119 .n(n)
10120 .k(k)
10121 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080010122 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -080010123 }
10124 }
10125 }
10126
10127 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, n_div_8_subtile) {
10128 TEST_REQUIRES_ARM_NEON_V8;
10129 for (uint32_t n = 16; n <= 24; n += 8) {
10130 for (size_t k = 1; k <= 80; k += 17) {
10131 for (uint32_t m = 1; m <= 2; m++) {
10132 GemmMicrokernelTester()
10133 .mr(2)
10134 .nr(8)
10135 .kr(4)
10136 .sr(1)
10137 .m(m)
10138 .n(n)
10139 .k(k)
10140 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010141 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -080010142 }
10143 }
10144 }
10145 }
10146
10147 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, small_kernel) {
10148 TEST_REQUIRES_ARM_NEON_V8;
10149 for (size_t k = 1; k <= 80; k += 17) {
10150 GemmMicrokernelTester()
10151 .mr(2)
10152 .nr(8)
10153 .kr(4)
10154 .sr(1)
10155 .m(2)
10156 .n(8)
10157 .k(k)
10158 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080010159 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -080010160 }
10161 }
10162
10163 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, small_kernel_subtile) {
10164 TEST_REQUIRES_ARM_NEON_V8;
10165 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010166 for (uint32_t n = 1; n <= 8; n++) {
10167 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080010168 GemmMicrokernelTester()
10169 .mr(2)
10170 .nr(8)
10171 .kr(4)
10172 .sr(1)
10173 .m(m)
10174 .n(n)
10175 .k(k)
10176 .ks(3)
10177 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010178 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -080010179 }
10180 }
10181 }
10182 }
10183
10184 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, n_gt_8_small_kernel) {
10185 TEST_REQUIRES_ARM_NEON_V8;
10186 for (uint32_t n = 9; n < 16; n++) {
10187 for (size_t k = 1; k <= 80; k += 17) {
10188 GemmMicrokernelTester()
10189 .mr(2)
10190 .nr(8)
10191 .kr(4)
10192 .sr(1)
10193 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010194 .n(n)
Frank Barchard64ab1b72021-11-22 10:57:40 -080010195 .k(k)
10196 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080010197 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -080010198 }
10199 }
10200 }
10201
10202 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, n_div_8_small_kernel) {
10203 TEST_REQUIRES_ARM_NEON_V8;
10204 for (uint32_t n = 16; n <= 24; n += 8) {
10205 for (size_t k = 1; k <= 80; k += 17) {
10206 GemmMicrokernelTester()
10207 .mr(2)
10208 .nr(8)
10209 .kr(4)
10210 .sr(1)
10211 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010212 .n(n)
Frank Barchard64ab1b72021-11-22 10:57:40 -080010213 .k(k)
10214 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080010215 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -080010216 }
10217 }
10218 }
10219
10220 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, strided_cm_subtile) {
10221 TEST_REQUIRES_ARM_NEON_V8;
10222 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010223 for (uint32_t n = 1; n <= 8; n++) {
10224 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080010225 GemmMicrokernelTester()
10226 .mr(2)
10227 .nr(8)
10228 .kr(4)
10229 .sr(1)
10230 .m(m)
10231 .n(n)
10232 .k(k)
10233 .cm_stride(11)
10234 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010235 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -080010236 }
10237 }
10238 }
10239 }
10240
10241 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, a_offset) {
10242 TEST_REQUIRES_ARM_NEON_V8;
10243 for (size_t k = 1; k <= 80; k += 17) {
10244 GemmMicrokernelTester()
10245 .mr(2)
10246 .nr(8)
10247 .kr(4)
10248 .sr(1)
10249 .m(2)
10250 .n(8)
10251 .k(k)
10252 .ks(3)
10253 .a_offset(163)
Marat Dukhan50323b82022-01-11 00:12:01 -080010254 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -080010255 }
10256 }
10257
10258 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, zero) {
10259 TEST_REQUIRES_ARM_NEON_V8;
Zhi An Ng83844ae2022-01-14 09:52:25 -080010260 for (size_t k = 1; k <= 80; k += 17) {
10261 for (uint32_t mz = 0; mz < 2; mz++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080010262 GemmMicrokernelTester()
10263 .mr(2)
10264 .nr(8)
10265 .kr(4)
10266 .sr(1)
10267 .m(2)
10268 .n(8)
10269 .k(k)
10270 .ks(3)
10271 .a_offset(163)
10272 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080010273 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -080010274 }
10275 }
10276 }
10277
10278 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, qmin) {
10279 TEST_REQUIRES_ARM_NEON_V8;
10280 GemmMicrokernelTester()
10281 .mr(2)
10282 .nr(8)
10283 .kr(4)
10284 .sr(1)
10285 .m(2)
10286 .n(8)
10287 .k(16)
10288 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010289 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -080010290 }
10291
10292 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, qmax) {
10293 TEST_REQUIRES_ARM_NEON_V8;
10294 GemmMicrokernelTester()
10295 .mr(2)
10296 .nr(8)
10297 .kr(4)
10298 .sr(1)
10299 .m(2)
10300 .n(8)
10301 .k(16)
10302 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010303 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -080010304 }
10305
10306 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, strided_cm) {
10307 TEST_REQUIRES_ARM_NEON_V8;
10308 GemmMicrokernelTester()
10309 .mr(2)
10310 .nr(8)
10311 .kr(4)
10312 .sr(1)
10313 .m(2)
10314 .n(8)
10315 .k(16)
10316 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080010317 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -080010318 }
10319#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
10320
10321
10322#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Frank Barcharde22685a2021-11-12 11:36:58 -080010323 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, k_eq_16) {
Frank Barchard287952a2021-11-03 15:26:45 -070010324 TEST_REQUIRES_ARM_NEON;
10325 GemmMicrokernelTester()
10326 .mr(1)
10327 .nr(8)
10328 .kr(2)
10329 .sr(1)
10330 .m(1)
10331 .n(8)
10332 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080010333 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -070010334 }
10335
Frank Barcharde22685a2021-11-12 11:36:58 -080010336 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, strided_cn) {
Frank Barchard287952a2021-11-03 15:26:45 -070010337 TEST_REQUIRES_ARM_NEON;
10338 GemmMicrokernelTester()
10339 .mr(1)
10340 .nr(8)
10341 .kr(2)
10342 .sr(1)
10343 .m(1)
10344 .n(8)
10345 .k(16)
10346 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080010347 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -070010348 }
10349
Frank Barcharde22685a2021-11-12 11:36:58 -080010350 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, k_eq_16_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -070010351 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080010352 for (uint32_t n = 1; n <= 8; n++) {
10353 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -070010354 GemmMicrokernelTester()
10355 .mr(1)
10356 .nr(8)
10357 .kr(2)
10358 .sr(1)
10359 .m(m)
10360 .n(n)
10361 .k(16)
10362 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010363 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -070010364 }
10365 }
10366 }
10367
Frank Barcharde22685a2021-11-12 11:36:58 -080010368 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, k_eq_16_subtile_m) {
Frank Barchard287952a2021-11-03 15:26:45 -070010369 TEST_REQUIRES_ARM_NEON;
10370 for (uint32_t m = 1; m <= 1; m++) {
10371 GemmMicrokernelTester()
10372 .mr(1)
10373 .nr(8)
10374 .kr(2)
10375 .sr(1)
10376 .m(m)
10377 .n(8)
10378 .k(16)
10379 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010380 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -070010381 }
10382 }
10383
Frank Barcharde22685a2021-11-12 11:36:58 -080010384 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, k_eq_16_subtile_n) {
Frank Barchard287952a2021-11-03 15:26:45 -070010385 TEST_REQUIRES_ARM_NEON;
10386 for (uint32_t n = 1; n <= 8; n++) {
10387 GemmMicrokernelTester()
10388 .mr(1)
10389 .nr(8)
10390 .kr(2)
10391 .sr(1)
10392 .m(1)
10393 .n(n)
10394 .k(16)
10395 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010396 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -070010397 }
10398 }
10399
Frank Barcharde22685a2021-11-12 11:36:58 -080010400 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, k_lt_16) {
Frank Barchard287952a2021-11-03 15:26:45 -070010401 TEST_REQUIRES_ARM_NEON;
10402 for (size_t k = 1; k < 16; k++) {
10403 GemmMicrokernelTester()
10404 .mr(1)
10405 .nr(8)
10406 .kr(2)
10407 .sr(1)
10408 .m(1)
10409 .n(8)
10410 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080010411 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -070010412 }
10413 }
10414
Frank Barcharde22685a2021-11-12 11:36:58 -080010415 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, k_lt_16_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -070010416 TEST_REQUIRES_ARM_NEON;
10417 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010418 for (uint32_t n = 1; n <= 8; n++) {
10419 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -070010420 GemmMicrokernelTester()
10421 .mr(1)
10422 .nr(8)
10423 .kr(2)
10424 .sr(1)
10425 .m(m)
10426 .n(n)
10427 .k(k)
10428 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010429 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -070010430 }
10431 }
10432 }
10433 }
10434
Frank Barcharde22685a2021-11-12 11:36:58 -080010435 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, k_gt_16) {
Frank Barchard287952a2021-11-03 15:26:45 -070010436 TEST_REQUIRES_ARM_NEON;
10437 for (size_t k = 17; k < 32; k++) {
10438 GemmMicrokernelTester()
10439 .mr(1)
10440 .nr(8)
10441 .kr(2)
10442 .sr(1)
10443 .m(1)
10444 .n(8)
10445 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080010446 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -070010447 }
10448 }
10449
Frank Barcharde22685a2021-11-12 11:36:58 -080010450 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, k_gt_16_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -070010451 TEST_REQUIRES_ARM_NEON;
10452 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010453 for (uint32_t n = 1; n <= 8; n++) {
10454 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -070010455 GemmMicrokernelTester()
10456 .mr(1)
10457 .nr(8)
10458 .kr(2)
10459 .sr(1)
10460 .m(m)
10461 .n(n)
10462 .k(k)
10463 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010464 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -070010465 }
10466 }
10467 }
10468 }
10469
Frank Barcharde22685a2021-11-12 11:36:58 -080010470 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, k_div_16) {
Frank Barchard287952a2021-11-03 15:26:45 -070010471 TEST_REQUIRES_ARM_NEON;
10472 for (size_t k = 32; k <= 160; k += 16) {
10473 GemmMicrokernelTester()
10474 .mr(1)
10475 .nr(8)
10476 .kr(2)
10477 .sr(1)
10478 .m(1)
10479 .n(8)
10480 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080010481 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -070010482 }
10483 }
10484
Frank Barcharde22685a2021-11-12 11:36:58 -080010485 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, k_div_16_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -070010486 TEST_REQUIRES_ARM_NEON;
10487 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010488 for (uint32_t n = 1; n <= 8; n++) {
10489 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -070010490 GemmMicrokernelTester()
10491 .mr(1)
10492 .nr(8)
10493 .kr(2)
10494 .sr(1)
10495 .m(m)
10496 .n(n)
10497 .k(k)
10498 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010499 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -070010500 }
10501 }
10502 }
10503 }
10504
Frank Barcharde22685a2021-11-12 11:36:58 -080010505 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, n_gt_8) {
Frank Barchard287952a2021-11-03 15:26:45 -070010506 TEST_REQUIRES_ARM_NEON;
10507 for (uint32_t n = 9; n < 16; n++) {
10508 for (size_t k = 1; k <= 80; k += 17) {
10509 GemmMicrokernelTester()
10510 .mr(1)
10511 .nr(8)
10512 .kr(2)
10513 .sr(1)
10514 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010515 .n(n)
Frank Barchard287952a2021-11-03 15:26:45 -070010516 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080010517 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -070010518 }
10519 }
10520 }
10521
Frank Barcharde22685a2021-11-12 11:36:58 -080010522 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, n_gt_8_strided_cn) {
Frank Barchard287952a2021-11-03 15:26:45 -070010523 TEST_REQUIRES_ARM_NEON;
10524 for (uint32_t n = 9; n < 16; n++) {
10525 for (size_t k = 1; k <= 80; k += 17) {
10526 GemmMicrokernelTester()
10527 .mr(1)
10528 .nr(8)
10529 .kr(2)
10530 .sr(1)
10531 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010532 .n(n)
Frank Barchard287952a2021-11-03 15:26:45 -070010533 .k(k)
10534 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080010535 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -070010536 }
10537 }
10538 }
10539
Frank Barcharde22685a2021-11-12 11:36:58 -080010540 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, n_gt_8_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -070010541 TEST_REQUIRES_ARM_NEON;
10542 for (uint32_t n = 9; n < 16; n++) {
10543 for (size_t k = 1; k <= 80; k += 17) {
10544 for (uint32_t m = 1; m <= 1; m++) {
10545 GemmMicrokernelTester()
10546 .mr(1)
10547 .nr(8)
10548 .kr(2)
10549 .sr(1)
10550 .m(m)
10551 .n(n)
10552 .k(k)
10553 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010554 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -070010555 }
10556 }
10557 }
10558 }
10559
Frank Barcharde22685a2021-11-12 11:36:58 -080010560 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, n_div_8) {
Frank Barchard287952a2021-11-03 15:26:45 -070010561 TEST_REQUIRES_ARM_NEON;
10562 for (uint32_t n = 16; n <= 24; n += 8) {
10563 for (size_t k = 1; k <= 80; k += 17) {
10564 GemmMicrokernelTester()
10565 .mr(1)
10566 .nr(8)
10567 .kr(2)
10568 .sr(1)
10569 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010570 .n(n)
Frank Barchard287952a2021-11-03 15:26:45 -070010571 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080010572 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -070010573 }
10574 }
10575 }
10576
Frank Barcharde22685a2021-11-12 11:36:58 -080010577 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, n_div_8_strided_cn) {
Frank Barchard287952a2021-11-03 15:26:45 -070010578 TEST_REQUIRES_ARM_NEON;
10579 for (uint32_t n = 16; n <= 24; n += 8) {
10580 for (size_t k = 1; k <= 80; k += 17) {
10581 GemmMicrokernelTester()
10582 .mr(1)
10583 .nr(8)
10584 .kr(2)
10585 .sr(1)
10586 .m(1)
10587 .n(n)
10588 .k(k)
10589 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080010590 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -070010591 }
10592 }
10593 }
10594
Frank Barcharde22685a2021-11-12 11:36:58 -080010595 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, n_div_8_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -070010596 TEST_REQUIRES_ARM_NEON;
10597 for (uint32_t n = 16; n <= 24; n += 8) {
10598 for (size_t k = 1; k <= 80; k += 17) {
10599 for (uint32_t m = 1; m <= 1; m++) {
10600 GemmMicrokernelTester()
10601 .mr(1)
10602 .nr(8)
10603 .kr(2)
10604 .sr(1)
10605 .m(m)
10606 .n(n)
10607 .k(k)
10608 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010609 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -070010610 }
10611 }
10612 }
10613 }
10614
Frank Barcharde22685a2021-11-12 11:36:58 -080010615 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, small_kernel) {
Frank Barchard287952a2021-11-03 15:26:45 -070010616 TEST_REQUIRES_ARM_NEON;
10617 for (size_t k = 1; k <= 80; k += 17) {
10618 GemmMicrokernelTester()
10619 .mr(1)
10620 .nr(8)
10621 .kr(2)
10622 .sr(1)
10623 .m(1)
10624 .n(8)
10625 .k(k)
10626 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080010627 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -070010628 }
10629 }
10630
Frank Barcharde22685a2021-11-12 11:36:58 -080010631 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, small_kernel_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -070010632 TEST_REQUIRES_ARM_NEON;
10633 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010634 for (uint32_t n = 1; n <= 8; n++) {
10635 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -070010636 GemmMicrokernelTester()
10637 .mr(1)
10638 .nr(8)
10639 .kr(2)
10640 .sr(1)
10641 .m(m)
10642 .n(n)
10643 .k(k)
10644 .ks(3)
10645 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010646 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -070010647 }
10648 }
10649 }
10650 }
10651
Frank Barcharde22685a2021-11-12 11:36:58 -080010652 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, n_gt_8_small_kernel) {
Frank Barchard287952a2021-11-03 15:26:45 -070010653 TEST_REQUIRES_ARM_NEON;
10654 for (uint32_t n = 9; n < 16; n++) {
10655 for (size_t k = 1; k <= 80; k += 17) {
10656 GemmMicrokernelTester()
10657 .mr(1)
10658 .nr(8)
10659 .kr(2)
10660 .sr(1)
10661 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010662 .n(n)
Frank Barchard287952a2021-11-03 15:26:45 -070010663 .k(k)
10664 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080010665 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -070010666 }
10667 }
10668 }
10669
Frank Barcharde22685a2021-11-12 11:36:58 -080010670 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, n_div_8_small_kernel) {
Frank Barchard287952a2021-11-03 15:26:45 -070010671 TEST_REQUIRES_ARM_NEON;
10672 for (uint32_t n = 16; n <= 24; n += 8) {
10673 for (size_t k = 1; k <= 80; k += 17) {
10674 GemmMicrokernelTester()
10675 .mr(1)
10676 .nr(8)
10677 .kr(2)
10678 .sr(1)
10679 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010680 .n(n)
Frank Barchard287952a2021-11-03 15:26:45 -070010681 .k(k)
10682 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080010683 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -070010684 }
10685 }
10686 }
10687
Frank Barcharde22685a2021-11-12 11:36:58 -080010688 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, strided_cm_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -070010689 TEST_REQUIRES_ARM_NEON;
10690 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010691 for (uint32_t n = 1; n <= 8; n++) {
10692 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -070010693 GemmMicrokernelTester()
10694 .mr(1)
10695 .nr(8)
10696 .kr(2)
10697 .sr(1)
10698 .m(m)
10699 .n(n)
10700 .k(k)
10701 .cm_stride(11)
10702 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010703 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -070010704 }
10705 }
10706 }
10707 }
10708
Frank Barcharde22685a2021-11-12 11:36:58 -080010709 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, a_offset) {
Frank Barchard287952a2021-11-03 15:26:45 -070010710 TEST_REQUIRES_ARM_NEON;
10711 for (size_t k = 1; k <= 80; k += 17) {
10712 GemmMicrokernelTester()
10713 .mr(1)
10714 .nr(8)
10715 .kr(2)
10716 .sr(1)
10717 .m(1)
10718 .n(8)
10719 .k(k)
10720 .ks(3)
10721 .a_offset(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080010722 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -070010723 }
10724 }
10725
Frank Barcharde22685a2021-11-12 11:36:58 -080010726 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, zero) {
Frank Barchard287952a2021-11-03 15:26:45 -070010727 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080010728 for (size_t k = 1; k <= 80; k += 17) {
10729 for (uint32_t mz = 0; mz < 1; mz++) {
Frank Barchard287952a2021-11-03 15:26:45 -070010730 GemmMicrokernelTester()
10731 .mr(1)
10732 .nr(8)
10733 .kr(2)
10734 .sr(1)
10735 .m(1)
10736 .n(8)
10737 .k(k)
10738 .ks(3)
10739 .a_offset(83)
10740 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080010741 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -070010742 }
10743 }
10744 }
10745
Frank Barcharde22685a2021-11-12 11:36:58 -080010746 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, qmin) {
Frank Barchard287952a2021-11-03 15:26:45 -070010747 TEST_REQUIRES_ARM_NEON;
10748 GemmMicrokernelTester()
10749 .mr(1)
10750 .nr(8)
10751 .kr(2)
10752 .sr(1)
10753 .m(1)
10754 .n(8)
10755 .k(16)
10756 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010757 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -070010758 }
10759
Frank Barcharde22685a2021-11-12 11:36:58 -080010760 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, qmax) {
Frank Barchard287952a2021-11-03 15:26:45 -070010761 TEST_REQUIRES_ARM_NEON;
10762 GemmMicrokernelTester()
10763 .mr(1)
10764 .nr(8)
10765 .kr(2)
10766 .sr(1)
10767 .m(1)
10768 .n(8)
10769 .k(16)
10770 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010771 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -070010772 }
10773
Frank Barcharde22685a2021-11-12 11:36:58 -080010774 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, strided_cm) {
Frank Barchard287952a2021-11-03 15:26:45 -070010775 TEST_REQUIRES_ARM_NEON;
10776 GemmMicrokernelTester()
10777 .mr(1)
10778 .nr(8)
10779 .kr(2)
10780 .sr(1)
10781 .m(1)
10782 .n(8)
10783 .k(16)
10784 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080010785 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -070010786 }
10787#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
10788
10789
10790#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Frank Barcharde22685a2021-11-12 11:36:58 -080010791 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, k_eq_16) {
Frank Barchard287952a2021-11-03 15:26:45 -070010792 TEST_REQUIRES_ARM_NEON;
10793 GemmMicrokernelTester()
10794 .mr(2)
10795 .nr(8)
10796 .kr(2)
10797 .sr(1)
10798 .m(2)
10799 .n(8)
10800 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080010801 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -070010802 }
10803
Frank Barcharde22685a2021-11-12 11:36:58 -080010804 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, strided_cn) {
Frank Barchard287952a2021-11-03 15:26:45 -070010805 TEST_REQUIRES_ARM_NEON;
10806 GemmMicrokernelTester()
10807 .mr(2)
10808 .nr(8)
10809 .kr(2)
10810 .sr(1)
10811 .m(2)
10812 .n(8)
10813 .k(16)
10814 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080010815 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -070010816 }
10817
Frank Barcharde22685a2021-11-12 11:36:58 -080010818 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, k_eq_16_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -070010819 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080010820 for (uint32_t n = 1; n <= 8; n++) {
10821 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -070010822 GemmMicrokernelTester()
10823 .mr(2)
10824 .nr(8)
10825 .kr(2)
10826 .sr(1)
10827 .m(m)
10828 .n(n)
10829 .k(16)
10830 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010831 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -070010832 }
10833 }
10834 }
10835
Frank Barcharde22685a2021-11-12 11:36:58 -080010836 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, k_eq_16_subtile_m) {
Frank Barchard287952a2021-11-03 15:26:45 -070010837 TEST_REQUIRES_ARM_NEON;
10838 for (uint32_t m = 1; m <= 2; m++) {
10839 GemmMicrokernelTester()
10840 .mr(2)
10841 .nr(8)
10842 .kr(2)
10843 .sr(1)
10844 .m(m)
10845 .n(8)
10846 .k(16)
10847 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010848 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -070010849 }
10850 }
10851
Frank Barcharde22685a2021-11-12 11:36:58 -080010852 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, k_eq_16_subtile_n) {
Frank Barchard287952a2021-11-03 15:26:45 -070010853 TEST_REQUIRES_ARM_NEON;
10854 for (uint32_t n = 1; n <= 8; n++) {
10855 GemmMicrokernelTester()
10856 .mr(2)
10857 .nr(8)
10858 .kr(2)
10859 .sr(1)
10860 .m(2)
10861 .n(n)
10862 .k(16)
10863 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010864 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -070010865 }
10866 }
10867
Frank Barcharde22685a2021-11-12 11:36:58 -080010868 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, k_lt_16) {
Frank Barchard287952a2021-11-03 15:26:45 -070010869 TEST_REQUIRES_ARM_NEON;
10870 for (size_t k = 1; k < 16; k++) {
10871 GemmMicrokernelTester()
10872 .mr(2)
10873 .nr(8)
10874 .kr(2)
10875 .sr(1)
10876 .m(2)
10877 .n(8)
10878 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080010879 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -070010880 }
10881 }
10882
Frank Barcharde22685a2021-11-12 11:36:58 -080010883 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, k_lt_16_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -070010884 TEST_REQUIRES_ARM_NEON;
10885 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010886 for (uint32_t n = 1; n <= 8; n++) {
10887 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -070010888 GemmMicrokernelTester()
10889 .mr(2)
10890 .nr(8)
10891 .kr(2)
10892 .sr(1)
10893 .m(m)
10894 .n(n)
10895 .k(k)
10896 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010897 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -070010898 }
10899 }
10900 }
10901 }
10902
Frank Barcharde22685a2021-11-12 11:36:58 -080010903 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, k_gt_16) {
Frank Barchard287952a2021-11-03 15:26:45 -070010904 TEST_REQUIRES_ARM_NEON;
10905 for (size_t k = 17; k < 32; k++) {
10906 GemmMicrokernelTester()
10907 .mr(2)
10908 .nr(8)
10909 .kr(2)
10910 .sr(1)
10911 .m(2)
10912 .n(8)
10913 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080010914 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -070010915 }
10916 }
10917
Frank Barcharde22685a2021-11-12 11:36:58 -080010918 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, k_gt_16_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -070010919 TEST_REQUIRES_ARM_NEON;
10920 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010921 for (uint32_t n = 1; n <= 8; n++) {
10922 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -070010923 GemmMicrokernelTester()
10924 .mr(2)
10925 .nr(8)
10926 .kr(2)
10927 .sr(1)
10928 .m(m)
10929 .n(n)
10930 .k(k)
10931 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010932 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -070010933 }
10934 }
10935 }
10936 }
10937
Frank Barcharde22685a2021-11-12 11:36:58 -080010938 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, k_div_16) {
Frank Barchard287952a2021-11-03 15:26:45 -070010939 TEST_REQUIRES_ARM_NEON;
10940 for (size_t k = 32; k <= 160; k += 16) {
10941 GemmMicrokernelTester()
10942 .mr(2)
10943 .nr(8)
10944 .kr(2)
10945 .sr(1)
10946 .m(2)
10947 .n(8)
10948 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080010949 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -070010950 }
10951 }
10952
Frank Barcharde22685a2021-11-12 11:36:58 -080010953 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, k_div_16_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -070010954 TEST_REQUIRES_ARM_NEON;
10955 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010956 for (uint32_t n = 1; n <= 8; n++) {
10957 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -070010958 GemmMicrokernelTester()
10959 .mr(2)
10960 .nr(8)
10961 .kr(2)
10962 .sr(1)
10963 .m(m)
10964 .n(n)
10965 .k(k)
10966 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010967 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -070010968 }
10969 }
10970 }
10971 }
10972
Frank Barcharde22685a2021-11-12 11:36:58 -080010973 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, n_gt_8) {
Frank Barchard287952a2021-11-03 15:26:45 -070010974 TEST_REQUIRES_ARM_NEON;
10975 for (uint32_t n = 9; n < 16; n++) {
10976 for (size_t k = 1; k <= 80; k += 17) {
10977 GemmMicrokernelTester()
10978 .mr(2)
10979 .nr(8)
10980 .kr(2)
10981 .sr(1)
10982 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010983 .n(n)
Frank Barchard287952a2021-11-03 15:26:45 -070010984 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080010985 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -070010986 }
10987 }
10988 }
10989
Frank Barcharde22685a2021-11-12 11:36:58 -080010990 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, n_gt_8_strided_cn) {
Frank Barchard287952a2021-11-03 15:26:45 -070010991 TEST_REQUIRES_ARM_NEON;
10992 for (uint32_t n = 9; n < 16; n++) {
10993 for (size_t k = 1; k <= 80; k += 17) {
10994 GemmMicrokernelTester()
10995 .mr(2)
10996 .nr(8)
10997 .kr(2)
10998 .sr(1)
10999 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011000 .n(n)
Frank Barchard287952a2021-11-03 15:26:45 -070011001 .k(k)
11002 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080011003 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -070011004 }
11005 }
11006 }
11007
Frank Barcharde22685a2021-11-12 11:36:58 -080011008 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, n_gt_8_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -070011009 TEST_REQUIRES_ARM_NEON;
11010 for (uint32_t n = 9; n < 16; n++) {
11011 for (size_t k = 1; k <= 80; k += 17) {
11012 for (uint32_t m = 1; m <= 2; m++) {
11013 GemmMicrokernelTester()
11014 .mr(2)
11015 .nr(8)
11016 .kr(2)
11017 .sr(1)
11018 .m(m)
11019 .n(n)
11020 .k(k)
11021 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011022 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -070011023 }
11024 }
11025 }
11026 }
11027
Frank Barcharde22685a2021-11-12 11:36:58 -080011028 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, n_div_8) {
Frank Barchard287952a2021-11-03 15:26:45 -070011029 TEST_REQUIRES_ARM_NEON;
11030 for (uint32_t n = 16; n <= 24; n += 8) {
11031 for (size_t k = 1; k <= 80; k += 17) {
11032 GemmMicrokernelTester()
11033 .mr(2)
11034 .nr(8)
11035 .kr(2)
11036 .sr(1)
11037 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011038 .n(n)
Frank Barchard287952a2021-11-03 15:26:45 -070011039 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080011040 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -070011041 }
11042 }
11043 }
11044
Frank Barcharde22685a2021-11-12 11:36:58 -080011045 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, n_div_8_strided_cn) {
Frank Barchard287952a2021-11-03 15:26:45 -070011046 TEST_REQUIRES_ARM_NEON;
11047 for (uint32_t n = 16; n <= 24; n += 8) {
11048 for (size_t k = 1; k <= 80; k += 17) {
11049 GemmMicrokernelTester()
11050 .mr(2)
11051 .nr(8)
11052 .kr(2)
11053 .sr(1)
11054 .m(2)
11055 .n(n)
11056 .k(k)
11057 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080011058 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -070011059 }
11060 }
11061 }
11062
Frank Barcharde22685a2021-11-12 11:36:58 -080011063 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, n_div_8_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -070011064 TEST_REQUIRES_ARM_NEON;
11065 for (uint32_t n = 16; n <= 24; n += 8) {
11066 for (size_t k = 1; k <= 80; k += 17) {
11067 for (uint32_t m = 1; m <= 2; m++) {
11068 GemmMicrokernelTester()
11069 .mr(2)
11070 .nr(8)
11071 .kr(2)
11072 .sr(1)
11073 .m(m)
11074 .n(n)
11075 .k(k)
11076 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011077 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -070011078 }
11079 }
11080 }
11081 }
11082
Frank Barcharde22685a2021-11-12 11:36:58 -080011083 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, small_kernel) {
Frank Barchard287952a2021-11-03 15:26:45 -070011084 TEST_REQUIRES_ARM_NEON;
11085 for (size_t k = 1; k <= 80; k += 17) {
11086 GemmMicrokernelTester()
11087 .mr(2)
11088 .nr(8)
11089 .kr(2)
11090 .sr(1)
11091 .m(2)
11092 .n(8)
11093 .k(k)
11094 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080011095 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -070011096 }
11097 }
11098
Frank Barcharde22685a2021-11-12 11:36:58 -080011099 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, small_kernel_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -070011100 TEST_REQUIRES_ARM_NEON;
11101 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011102 for (uint32_t n = 1; n <= 8; n++) {
11103 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -070011104 GemmMicrokernelTester()
11105 .mr(2)
11106 .nr(8)
11107 .kr(2)
11108 .sr(1)
11109 .m(m)
11110 .n(n)
11111 .k(k)
11112 .ks(3)
11113 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011114 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -070011115 }
11116 }
11117 }
11118 }
11119
Frank Barcharde22685a2021-11-12 11:36:58 -080011120 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, n_gt_8_small_kernel) {
Frank Barchard287952a2021-11-03 15:26:45 -070011121 TEST_REQUIRES_ARM_NEON;
11122 for (uint32_t n = 9; n < 16; n++) {
11123 for (size_t k = 1; k <= 80; k += 17) {
11124 GemmMicrokernelTester()
11125 .mr(2)
11126 .nr(8)
11127 .kr(2)
11128 .sr(1)
11129 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011130 .n(n)
Frank Barchard287952a2021-11-03 15:26:45 -070011131 .k(k)
11132 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080011133 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -070011134 }
11135 }
11136 }
11137
Frank Barcharde22685a2021-11-12 11:36:58 -080011138 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, n_div_8_small_kernel) {
Frank Barchard287952a2021-11-03 15:26:45 -070011139 TEST_REQUIRES_ARM_NEON;
11140 for (uint32_t n = 16; n <= 24; n += 8) {
11141 for (size_t k = 1; k <= 80; k += 17) {
11142 GemmMicrokernelTester()
11143 .mr(2)
11144 .nr(8)
11145 .kr(2)
11146 .sr(1)
11147 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011148 .n(n)
Frank Barchard287952a2021-11-03 15:26:45 -070011149 .k(k)
11150 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080011151 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -070011152 }
11153 }
11154 }
11155
Frank Barcharde22685a2021-11-12 11:36:58 -080011156 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, strided_cm_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -070011157 TEST_REQUIRES_ARM_NEON;
11158 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011159 for (uint32_t n = 1; n <= 8; n++) {
11160 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -070011161 GemmMicrokernelTester()
11162 .mr(2)
11163 .nr(8)
11164 .kr(2)
11165 .sr(1)
11166 .m(m)
11167 .n(n)
11168 .k(k)
11169 .cm_stride(11)
11170 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011171 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -070011172 }
11173 }
11174 }
11175 }
11176
Frank Barcharde22685a2021-11-12 11:36:58 -080011177 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, a_offset) {
Frank Barchard287952a2021-11-03 15:26:45 -070011178 TEST_REQUIRES_ARM_NEON;
11179 for (size_t k = 1; k <= 80; k += 17) {
11180 GemmMicrokernelTester()
11181 .mr(2)
11182 .nr(8)
11183 .kr(2)
11184 .sr(1)
11185 .m(2)
11186 .n(8)
11187 .k(k)
11188 .ks(3)
11189 .a_offset(163)
Marat Dukhan50323b82022-01-11 00:12:01 -080011190 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -070011191 }
11192 }
11193
Frank Barcharde22685a2021-11-12 11:36:58 -080011194 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, zero) {
Frank Barchard287952a2021-11-03 15:26:45 -070011195 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080011196 for (size_t k = 1; k <= 80; k += 17) {
11197 for (uint32_t mz = 0; mz < 2; mz++) {
Frank Barchard287952a2021-11-03 15:26:45 -070011198 GemmMicrokernelTester()
11199 .mr(2)
11200 .nr(8)
11201 .kr(2)
11202 .sr(1)
11203 .m(2)
11204 .n(8)
11205 .k(k)
11206 .ks(3)
11207 .a_offset(163)
11208 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080011209 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -070011210 }
11211 }
11212 }
11213
Frank Barcharde22685a2021-11-12 11:36:58 -080011214 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, qmin) {
Frank Barchard287952a2021-11-03 15:26:45 -070011215 TEST_REQUIRES_ARM_NEON;
11216 GemmMicrokernelTester()
11217 .mr(2)
11218 .nr(8)
11219 .kr(2)
11220 .sr(1)
11221 .m(2)
11222 .n(8)
11223 .k(16)
11224 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011225 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -070011226 }
11227
Frank Barcharde22685a2021-11-12 11:36:58 -080011228 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, qmax) {
Frank Barchard287952a2021-11-03 15:26:45 -070011229 TEST_REQUIRES_ARM_NEON;
11230 GemmMicrokernelTester()
11231 .mr(2)
11232 .nr(8)
11233 .kr(2)
11234 .sr(1)
11235 .m(2)
11236 .n(8)
11237 .k(16)
11238 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011239 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -070011240 }
11241
Frank Barcharde22685a2021-11-12 11:36:58 -080011242 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, strided_cm) {
Frank Barchard287952a2021-11-03 15:26:45 -070011243 TEST_REQUIRES_ARM_NEON;
11244 GemmMicrokernelTester()
11245 .mr(2)
11246 .nr(8)
11247 .kr(2)
11248 .sr(1)
11249 .m(2)
11250 .n(8)
11251 .k(16)
11252 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080011253 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -070011254 }
11255#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
11256
11257
Frank Barcharde4d3f762021-12-23 15:31:43 -080011258#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Frank Barcharde22685a2021-11-12 11:36:58 -080011259 TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, k_eq_16) {
Frank Barchard1663c0c2021-07-01 11:20:06 -070011260 TEST_REQUIRES_ARM_NEON;
11261 GemmMicrokernelTester()
11262 .mr(2)
11263 .nr(8)
11264 .kr(16)
11265 .sr(1)
11266 .m(2)
11267 .n(8)
11268 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080011269 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1663c0c2021-07-01 11:20:06 -070011270 }
11271
Frank Barcharde22685a2021-11-12 11:36:58 -080011272 TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, strided_cn) {
Frank Barchard1663c0c2021-07-01 11:20:06 -070011273 TEST_REQUIRES_ARM_NEON;
11274 GemmMicrokernelTester()
11275 .mr(2)
11276 .nr(8)
11277 .kr(16)
11278 .sr(1)
11279 .m(2)
11280 .n(8)
11281 .k(16)
11282 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080011283 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1663c0c2021-07-01 11:20:06 -070011284 }
11285
Frank Barcharde22685a2021-11-12 11:36:58 -080011286 TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, k_eq_16_subtile) {
Frank Barchard1663c0c2021-07-01 11:20:06 -070011287 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080011288 for (uint32_t n = 1; n <= 8; n++) {
11289 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard1663c0c2021-07-01 11:20:06 -070011290 GemmMicrokernelTester()
11291 .mr(2)
11292 .nr(8)
11293 .kr(16)
11294 .sr(1)
11295 .m(m)
11296 .n(n)
11297 .k(16)
11298 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011299 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1663c0c2021-07-01 11:20:06 -070011300 }
11301 }
11302 }
11303
Frank Barcharde22685a2021-11-12 11:36:58 -080011304 TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, k_eq_16_subtile_m) {
Frank Barchard1663c0c2021-07-01 11:20:06 -070011305 TEST_REQUIRES_ARM_NEON;
11306 for (uint32_t m = 1; m <= 2; m++) {
11307 GemmMicrokernelTester()
11308 .mr(2)
11309 .nr(8)
11310 .kr(16)
11311 .sr(1)
11312 .m(m)
11313 .n(8)
11314 .k(16)
11315 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011316 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1663c0c2021-07-01 11:20:06 -070011317 }
11318 }
11319
Frank Barcharde22685a2021-11-12 11:36:58 -080011320 TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, k_eq_16_subtile_n) {
Frank Barchard1663c0c2021-07-01 11:20:06 -070011321 TEST_REQUIRES_ARM_NEON;
11322 for (uint32_t n = 1; n <= 8; n++) {
11323 GemmMicrokernelTester()
11324 .mr(2)
11325 .nr(8)
11326 .kr(16)
11327 .sr(1)
11328 .m(2)
11329 .n(n)
11330 .k(16)
11331 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011332 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1663c0c2021-07-01 11:20:06 -070011333 }
11334 }
11335
Frank Barcharde22685a2021-11-12 11:36:58 -080011336 TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, k_lt_16) {
Frank Barchard1663c0c2021-07-01 11:20:06 -070011337 TEST_REQUIRES_ARM_NEON;
11338 for (size_t k = 1; k < 16; k++) {
11339 GemmMicrokernelTester()
11340 .mr(2)
11341 .nr(8)
11342 .kr(16)
11343 .sr(1)
11344 .m(2)
11345 .n(8)
11346 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080011347 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1663c0c2021-07-01 11:20:06 -070011348 }
11349 }
11350
Frank Barcharde22685a2021-11-12 11:36:58 -080011351 TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, k_lt_16_subtile) {
Frank Barchard1663c0c2021-07-01 11:20:06 -070011352 TEST_REQUIRES_ARM_NEON;
11353 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011354 for (uint32_t n = 1; n <= 8; n++) {
11355 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard1663c0c2021-07-01 11:20:06 -070011356 GemmMicrokernelTester()
11357 .mr(2)
11358 .nr(8)
11359 .kr(16)
11360 .sr(1)
11361 .m(m)
11362 .n(n)
11363 .k(k)
11364 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011365 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1663c0c2021-07-01 11:20:06 -070011366 }
11367 }
11368 }
11369 }
11370
Frank Barcharde22685a2021-11-12 11:36:58 -080011371 TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, k_gt_16) {
Frank Barchard1663c0c2021-07-01 11:20:06 -070011372 TEST_REQUIRES_ARM_NEON;
11373 for (size_t k = 17; k < 32; k++) {
11374 GemmMicrokernelTester()
11375 .mr(2)
11376 .nr(8)
11377 .kr(16)
11378 .sr(1)
11379 .m(2)
11380 .n(8)
11381 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080011382 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1663c0c2021-07-01 11:20:06 -070011383 }
11384 }
11385
Frank Barcharde22685a2021-11-12 11:36:58 -080011386 TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, k_gt_16_subtile) {
Frank Barchard1663c0c2021-07-01 11:20:06 -070011387 TEST_REQUIRES_ARM_NEON;
11388 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011389 for (uint32_t n = 1; n <= 8; n++) {
11390 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard1663c0c2021-07-01 11:20:06 -070011391 GemmMicrokernelTester()
11392 .mr(2)
11393 .nr(8)
11394 .kr(16)
11395 .sr(1)
11396 .m(m)
11397 .n(n)
11398 .k(k)
11399 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011400 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1663c0c2021-07-01 11:20:06 -070011401 }
11402 }
11403 }
11404 }
11405
Frank Barcharde22685a2021-11-12 11:36:58 -080011406 TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, k_div_16) {
Frank Barchard1663c0c2021-07-01 11:20:06 -070011407 TEST_REQUIRES_ARM_NEON;
11408 for (size_t k = 32; k <= 160; k += 16) {
11409 GemmMicrokernelTester()
11410 .mr(2)
11411 .nr(8)
11412 .kr(16)
11413 .sr(1)
11414 .m(2)
11415 .n(8)
11416 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080011417 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1663c0c2021-07-01 11:20:06 -070011418 }
11419 }
11420
Frank Barcharde22685a2021-11-12 11:36:58 -080011421 TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, k_div_16_subtile) {
Frank Barchard1663c0c2021-07-01 11:20:06 -070011422 TEST_REQUIRES_ARM_NEON;
11423 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011424 for (uint32_t n = 1; n <= 8; n++) {
11425 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard1663c0c2021-07-01 11:20:06 -070011426 GemmMicrokernelTester()
11427 .mr(2)
11428 .nr(8)
11429 .kr(16)
11430 .sr(1)
11431 .m(m)
11432 .n(n)
11433 .k(k)
11434 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011435 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1663c0c2021-07-01 11:20:06 -070011436 }
11437 }
11438 }
11439 }
11440
Frank Barcharde22685a2021-11-12 11:36:58 -080011441 TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, n_gt_8) {
Frank Barchard1663c0c2021-07-01 11:20:06 -070011442 TEST_REQUIRES_ARM_NEON;
11443 for (uint32_t n = 9; n < 16; n++) {
11444 for (size_t k = 1; k <= 80; k += 17) {
11445 GemmMicrokernelTester()
11446 .mr(2)
11447 .nr(8)
11448 .kr(16)
11449 .sr(1)
11450 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011451 .n(n)
Frank Barchard1663c0c2021-07-01 11:20:06 -070011452 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080011453 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1663c0c2021-07-01 11:20:06 -070011454 }
11455 }
11456 }
11457
Frank Barcharde22685a2021-11-12 11:36:58 -080011458 TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, n_gt_8_strided_cn) {
Frank Barchard1663c0c2021-07-01 11:20:06 -070011459 TEST_REQUIRES_ARM_NEON;
11460 for (uint32_t n = 9; n < 16; n++) {
11461 for (size_t k = 1; k <= 80; k += 17) {
11462 GemmMicrokernelTester()
11463 .mr(2)
11464 .nr(8)
11465 .kr(16)
11466 .sr(1)
11467 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011468 .n(n)
Frank Barchard1663c0c2021-07-01 11:20:06 -070011469 .k(k)
11470 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080011471 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1663c0c2021-07-01 11:20:06 -070011472 }
11473 }
11474 }
11475
Frank Barcharde22685a2021-11-12 11:36:58 -080011476 TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, n_gt_8_subtile) {
Frank Barchard1663c0c2021-07-01 11:20:06 -070011477 TEST_REQUIRES_ARM_NEON;
11478 for (uint32_t n = 9; n < 16; n++) {
11479 for (size_t k = 1; k <= 80; k += 17) {
11480 for (uint32_t m = 1; m <= 2; m++) {
11481 GemmMicrokernelTester()
11482 .mr(2)
11483 .nr(8)
11484 .kr(16)
11485 .sr(1)
11486 .m(m)
11487 .n(n)
11488 .k(k)
11489 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011490 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1663c0c2021-07-01 11:20:06 -070011491 }
11492 }
11493 }
11494 }
11495
Frank Barcharde22685a2021-11-12 11:36:58 -080011496 TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, n_div_8) {
Frank Barchard1663c0c2021-07-01 11:20:06 -070011497 TEST_REQUIRES_ARM_NEON;
11498 for (uint32_t n = 16; n <= 24; n += 8) {
11499 for (size_t k = 1; k <= 80; k += 17) {
11500 GemmMicrokernelTester()
11501 .mr(2)
11502 .nr(8)
11503 .kr(16)
11504 .sr(1)
11505 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011506 .n(n)
Frank Barchard1663c0c2021-07-01 11:20:06 -070011507 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080011508 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1663c0c2021-07-01 11:20:06 -070011509 }
11510 }
11511 }
11512
Frank Barcharde22685a2021-11-12 11:36:58 -080011513 TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, n_div_8_strided_cn) {
Frank Barchard1663c0c2021-07-01 11:20:06 -070011514 TEST_REQUIRES_ARM_NEON;
11515 for (uint32_t n = 16; n <= 24; n += 8) {
11516 for (size_t k = 1; k <= 80; k += 17) {
11517 GemmMicrokernelTester()
11518 .mr(2)
11519 .nr(8)
11520 .kr(16)
11521 .sr(1)
11522 .m(2)
11523 .n(n)
11524 .k(k)
11525 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080011526 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1663c0c2021-07-01 11:20:06 -070011527 }
11528 }
11529 }
11530
Frank Barcharde22685a2021-11-12 11:36:58 -080011531 TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, n_div_8_subtile) {
Frank Barchard1663c0c2021-07-01 11:20:06 -070011532 TEST_REQUIRES_ARM_NEON;
11533 for (uint32_t n = 16; n <= 24; n += 8) {
11534 for (size_t k = 1; k <= 80; k += 17) {
11535 for (uint32_t m = 1; m <= 2; m++) {
11536 GemmMicrokernelTester()
11537 .mr(2)
11538 .nr(8)
11539 .kr(16)
11540 .sr(1)
11541 .m(m)
11542 .n(n)
11543 .k(k)
11544 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011545 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1663c0c2021-07-01 11:20:06 -070011546 }
11547 }
11548 }
11549 }
11550
Frank Barcharde22685a2021-11-12 11:36:58 -080011551 TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, small_kernel) {
Frank Barchard1663c0c2021-07-01 11:20:06 -070011552 TEST_REQUIRES_ARM_NEON;
11553 for (size_t k = 1; k <= 80; k += 17) {
11554 GemmMicrokernelTester()
11555 .mr(2)
11556 .nr(8)
11557 .kr(16)
11558 .sr(1)
11559 .m(2)
11560 .n(8)
11561 .k(k)
11562 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080011563 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1663c0c2021-07-01 11:20:06 -070011564 }
11565 }
11566
Frank Barcharde22685a2021-11-12 11:36:58 -080011567 TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, small_kernel_subtile) {
Frank Barchard1663c0c2021-07-01 11:20:06 -070011568 TEST_REQUIRES_ARM_NEON;
11569 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011570 for (uint32_t n = 1; n <= 8; n++) {
11571 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard1663c0c2021-07-01 11:20:06 -070011572 GemmMicrokernelTester()
11573 .mr(2)
11574 .nr(8)
11575 .kr(16)
11576 .sr(1)
11577 .m(m)
11578 .n(n)
11579 .k(k)
11580 .ks(3)
11581 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011582 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1663c0c2021-07-01 11:20:06 -070011583 }
11584 }
11585 }
11586 }
11587
Frank Barcharde22685a2021-11-12 11:36:58 -080011588 TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, n_gt_8_small_kernel) {
Frank Barchard1663c0c2021-07-01 11:20:06 -070011589 TEST_REQUIRES_ARM_NEON;
11590 for (uint32_t n = 9; n < 16; n++) {
11591 for (size_t k = 1; k <= 80; k += 17) {
11592 GemmMicrokernelTester()
11593 .mr(2)
11594 .nr(8)
11595 .kr(16)
11596 .sr(1)
11597 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011598 .n(n)
Frank Barchard1663c0c2021-07-01 11:20:06 -070011599 .k(k)
11600 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080011601 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1663c0c2021-07-01 11:20:06 -070011602 }
11603 }
11604 }
11605
Frank Barcharde22685a2021-11-12 11:36:58 -080011606 TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, n_div_8_small_kernel) {
Frank Barchard1663c0c2021-07-01 11:20:06 -070011607 TEST_REQUIRES_ARM_NEON;
11608 for (uint32_t n = 16; n <= 24; n += 8) {
11609 for (size_t k = 1; k <= 80; k += 17) {
11610 GemmMicrokernelTester()
11611 .mr(2)
11612 .nr(8)
11613 .kr(16)
11614 .sr(1)
11615 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011616 .n(n)
Frank Barchard1663c0c2021-07-01 11:20:06 -070011617 .k(k)
11618 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080011619 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1663c0c2021-07-01 11:20:06 -070011620 }
11621 }
11622 }
11623
Frank Barcharde22685a2021-11-12 11:36:58 -080011624 TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, strided_cm_subtile) {
Frank Barchard1663c0c2021-07-01 11:20:06 -070011625 TEST_REQUIRES_ARM_NEON;
11626 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011627 for (uint32_t n = 1; n <= 8; n++) {
11628 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard1663c0c2021-07-01 11:20:06 -070011629 GemmMicrokernelTester()
11630 .mr(2)
11631 .nr(8)
11632 .kr(16)
11633 .sr(1)
11634 .m(m)
11635 .n(n)
11636 .k(k)
11637 .cm_stride(11)
11638 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011639 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1663c0c2021-07-01 11:20:06 -070011640 }
11641 }
11642 }
11643 }
11644
Frank Barcharde22685a2021-11-12 11:36:58 -080011645 TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, a_offset) {
Frank Barchard1663c0c2021-07-01 11:20:06 -070011646 TEST_REQUIRES_ARM_NEON;
11647 for (size_t k = 1; k <= 80; k += 17) {
11648 GemmMicrokernelTester()
11649 .mr(2)
11650 .nr(8)
11651 .kr(16)
11652 .sr(1)
11653 .m(2)
11654 .n(8)
11655 .k(k)
11656 .ks(3)
11657 .a_offset(163)
Marat Dukhan50323b82022-01-11 00:12:01 -080011658 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1663c0c2021-07-01 11:20:06 -070011659 }
11660 }
11661
Frank Barcharde22685a2021-11-12 11:36:58 -080011662 TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, zero) {
Frank Barchard1663c0c2021-07-01 11:20:06 -070011663 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080011664 for (size_t k = 1; k <= 80; k += 17) {
11665 for (uint32_t mz = 0; mz < 2; mz++) {
Frank Barchard1663c0c2021-07-01 11:20:06 -070011666 GemmMicrokernelTester()
11667 .mr(2)
11668 .nr(8)
11669 .kr(16)
11670 .sr(1)
11671 .m(2)
11672 .n(8)
11673 .k(k)
11674 .ks(3)
11675 .a_offset(163)
11676 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080011677 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1663c0c2021-07-01 11:20:06 -070011678 }
11679 }
11680 }
11681
Frank Barcharde22685a2021-11-12 11:36:58 -080011682 TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, qmin) {
Frank Barchard1663c0c2021-07-01 11:20:06 -070011683 TEST_REQUIRES_ARM_NEON;
11684 GemmMicrokernelTester()
11685 .mr(2)
11686 .nr(8)
11687 .kr(16)
11688 .sr(1)
11689 .m(2)
11690 .n(8)
11691 .k(16)
11692 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011693 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1663c0c2021-07-01 11:20:06 -070011694 }
11695
Frank Barcharde22685a2021-11-12 11:36:58 -080011696 TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, qmax) {
Frank Barchard1663c0c2021-07-01 11:20:06 -070011697 TEST_REQUIRES_ARM_NEON;
11698 GemmMicrokernelTester()
11699 .mr(2)
11700 .nr(8)
11701 .kr(16)
11702 .sr(1)
11703 .m(2)
11704 .n(8)
11705 .k(16)
11706 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011707 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1663c0c2021-07-01 11:20:06 -070011708 }
11709
Frank Barcharde22685a2021-11-12 11:36:58 -080011710 TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, strided_cm) {
Frank Barchard1663c0c2021-07-01 11:20:06 -070011711 TEST_REQUIRES_ARM_NEON;
11712 GemmMicrokernelTester()
11713 .mr(2)
11714 .nr(8)
11715 .kr(16)
11716 .sr(1)
11717 .m(2)
11718 .n(8)
11719 .k(16)
11720 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080011721 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1663c0c2021-07-01 11:20:06 -070011722 }
Frank Barcharde4d3f762021-12-23 15:31:43 -080011723#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Frank Barchard1663c0c2021-07-01 11:20:06 -070011724
11725
Frank Barcharde4d3f762021-12-23 15:31:43 -080011726#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Frank Barcharde22685a2021-11-12 11:36:58 -080011727 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_eq_16) {
Frank Barchard960ae342021-07-01 11:31:11 -070011728 TEST_REQUIRES_ARM_NEON;
11729 GemmMicrokernelTester()
11730 .mr(1)
11731 .nr(8)
11732 .kr(8)
11733 .sr(1)
11734 .m(1)
11735 .n(8)
11736 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080011737 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -070011738 }
11739
Frank Barcharde22685a2021-11-12 11:36:58 -080011740 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, strided_cn) {
Frank Barchard960ae342021-07-01 11:31:11 -070011741 TEST_REQUIRES_ARM_NEON;
11742 GemmMicrokernelTester()
11743 .mr(1)
11744 .nr(8)
11745 .kr(8)
11746 .sr(1)
11747 .m(1)
11748 .n(8)
11749 .k(16)
11750 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080011751 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -070011752 }
11753
Frank Barcharde22685a2021-11-12 11:36:58 -080011754 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_eq_16_subtile) {
Frank Barchard960ae342021-07-01 11:31:11 -070011755 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080011756 for (uint32_t n = 1; n <= 8; n++) {
11757 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard960ae342021-07-01 11:31:11 -070011758 GemmMicrokernelTester()
11759 .mr(1)
11760 .nr(8)
11761 .kr(8)
11762 .sr(1)
11763 .m(m)
11764 .n(n)
11765 .k(16)
11766 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011767 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -070011768 }
11769 }
11770 }
11771
Frank Barcharde22685a2021-11-12 11:36:58 -080011772 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_eq_16_subtile_m) {
Frank Barchard960ae342021-07-01 11:31:11 -070011773 TEST_REQUIRES_ARM_NEON;
11774 for (uint32_t m = 1; m <= 1; m++) {
11775 GemmMicrokernelTester()
11776 .mr(1)
11777 .nr(8)
11778 .kr(8)
11779 .sr(1)
11780 .m(m)
11781 .n(8)
11782 .k(16)
11783 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011784 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -070011785 }
11786 }
11787
Frank Barcharde22685a2021-11-12 11:36:58 -080011788 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_eq_16_subtile_n) {
Frank Barchard960ae342021-07-01 11:31:11 -070011789 TEST_REQUIRES_ARM_NEON;
11790 for (uint32_t n = 1; n <= 8; n++) {
11791 GemmMicrokernelTester()
11792 .mr(1)
11793 .nr(8)
11794 .kr(8)
11795 .sr(1)
11796 .m(1)
11797 .n(n)
11798 .k(16)
11799 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011800 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -070011801 }
11802 }
11803
Frank Barcharde22685a2021-11-12 11:36:58 -080011804 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_lt_16) {
Frank Barchard960ae342021-07-01 11:31:11 -070011805 TEST_REQUIRES_ARM_NEON;
11806 for (size_t k = 1; k < 16; k++) {
11807 GemmMicrokernelTester()
11808 .mr(1)
11809 .nr(8)
11810 .kr(8)
11811 .sr(1)
11812 .m(1)
11813 .n(8)
11814 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080011815 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -070011816 }
11817 }
11818
Frank Barcharde22685a2021-11-12 11:36:58 -080011819 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_lt_16_subtile) {
Frank Barchard960ae342021-07-01 11:31:11 -070011820 TEST_REQUIRES_ARM_NEON;
11821 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011822 for (uint32_t n = 1; n <= 8; n++) {
11823 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard960ae342021-07-01 11:31:11 -070011824 GemmMicrokernelTester()
11825 .mr(1)
11826 .nr(8)
11827 .kr(8)
11828 .sr(1)
11829 .m(m)
11830 .n(n)
11831 .k(k)
11832 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011833 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -070011834 }
11835 }
11836 }
11837 }
11838
Frank Barcharde22685a2021-11-12 11:36:58 -080011839 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_gt_16) {
Frank Barchard960ae342021-07-01 11:31:11 -070011840 TEST_REQUIRES_ARM_NEON;
11841 for (size_t k = 17; k < 32; k++) {
11842 GemmMicrokernelTester()
11843 .mr(1)
11844 .nr(8)
11845 .kr(8)
11846 .sr(1)
11847 .m(1)
11848 .n(8)
11849 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080011850 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -070011851 }
11852 }
11853
Frank Barcharde22685a2021-11-12 11:36:58 -080011854 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_gt_16_subtile) {
Frank Barchard960ae342021-07-01 11:31:11 -070011855 TEST_REQUIRES_ARM_NEON;
11856 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011857 for (uint32_t n = 1; n <= 8; n++) {
11858 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard960ae342021-07-01 11:31:11 -070011859 GemmMicrokernelTester()
11860 .mr(1)
11861 .nr(8)
11862 .kr(8)
11863 .sr(1)
11864 .m(m)
11865 .n(n)
11866 .k(k)
11867 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011868 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -070011869 }
11870 }
11871 }
11872 }
11873
Frank Barcharde22685a2021-11-12 11:36:58 -080011874 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_div_16) {
Frank Barchard960ae342021-07-01 11:31:11 -070011875 TEST_REQUIRES_ARM_NEON;
11876 for (size_t k = 32; k <= 160; k += 16) {
11877 GemmMicrokernelTester()
11878 .mr(1)
11879 .nr(8)
11880 .kr(8)
11881 .sr(1)
11882 .m(1)
11883 .n(8)
11884 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080011885 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -070011886 }
11887 }
11888
Frank Barcharde22685a2021-11-12 11:36:58 -080011889 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_div_16_subtile) {
Frank Barchard960ae342021-07-01 11:31:11 -070011890 TEST_REQUIRES_ARM_NEON;
11891 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011892 for (uint32_t n = 1; n <= 8; n++) {
11893 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard960ae342021-07-01 11:31:11 -070011894 GemmMicrokernelTester()
11895 .mr(1)
11896 .nr(8)
11897 .kr(8)
11898 .sr(1)
11899 .m(m)
11900 .n(n)
11901 .k(k)
11902 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011903 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -070011904 }
11905 }
11906 }
11907 }
11908
Frank Barcharde22685a2021-11-12 11:36:58 -080011909 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_gt_8) {
Frank Barchard960ae342021-07-01 11:31:11 -070011910 TEST_REQUIRES_ARM_NEON;
11911 for (uint32_t n = 9; n < 16; n++) {
11912 for (size_t k = 1; k <= 80; k += 17) {
11913 GemmMicrokernelTester()
11914 .mr(1)
11915 .nr(8)
11916 .kr(8)
11917 .sr(1)
11918 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011919 .n(n)
Frank Barchard960ae342021-07-01 11:31:11 -070011920 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080011921 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -070011922 }
11923 }
11924 }
11925
Frank Barcharde22685a2021-11-12 11:36:58 -080011926 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_gt_8_strided_cn) {
Frank Barchard960ae342021-07-01 11:31:11 -070011927 TEST_REQUIRES_ARM_NEON;
11928 for (uint32_t n = 9; n < 16; n++) {
11929 for (size_t k = 1; k <= 80; k += 17) {
11930 GemmMicrokernelTester()
11931 .mr(1)
11932 .nr(8)
11933 .kr(8)
11934 .sr(1)
11935 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011936 .n(n)
Frank Barchard960ae342021-07-01 11:31:11 -070011937 .k(k)
11938 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080011939 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -070011940 }
11941 }
11942 }
11943
Frank Barcharde22685a2021-11-12 11:36:58 -080011944 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_gt_8_subtile) {
Frank Barchard960ae342021-07-01 11:31:11 -070011945 TEST_REQUIRES_ARM_NEON;
11946 for (uint32_t n = 9; n < 16; n++) {
11947 for (size_t k = 1; k <= 80; k += 17) {
11948 for (uint32_t m = 1; m <= 1; m++) {
11949 GemmMicrokernelTester()
11950 .mr(1)
11951 .nr(8)
11952 .kr(8)
11953 .sr(1)
11954 .m(m)
11955 .n(n)
11956 .k(k)
11957 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011958 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -070011959 }
11960 }
11961 }
11962 }
11963
Frank Barcharde22685a2021-11-12 11:36:58 -080011964 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_div_8) {
Frank Barchard960ae342021-07-01 11:31:11 -070011965 TEST_REQUIRES_ARM_NEON;
11966 for (uint32_t n = 16; n <= 24; n += 8) {
11967 for (size_t k = 1; k <= 80; k += 17) {
11968 GemmMicrokernelTester()
11969 .mr(1)
11970 .nr(8)
11971 .kr(8)
11972 .sr(1)
11973 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011974 .n(n)
Frank Barchard960ae342021-07-01 11:31:11 -070011975 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080011976 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -070011977 }
11978 }
11979 }
11980
Frank Barcharde22685a2021-11-12 11:36:58 -080011981 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_div_8_strided_cn) {
Frank Barchard960ae342021-07-01 11:31:11 -070011982 TEST_REQUIRES_ARM_NEON;
11983 for (uint32_t n = 16; n <= 24; n += 8) {
11984 for (size_t k = 1; k <= 80; k += 17) {
11985 GemmMicrokernelTester()
11986 .mr(1)
11987 .nr(8)
11988 .kr(8)
11989 .sr(1)
11990 .m(1)
11991 .n(n)
11992 .k(k)
11993 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080011994 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -070011995 }
11996 }
11997 }
11998
Frank Barcharde22685a2021-11-12 11:36:58 -080011999 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_div_8_subtile) {
Frank Barchard960ae342021-07-01 11:31:11 -070012000 TEST_REQUIRES_ARM_NEON;
12001 for (uint32_t n = 16; n <= 24; n += 8) {
12002 for (size_t k = 1; k <= 80; k += 17) {
12003 for (uint32_t m = 1; m <= 1; m++) {
12004 GemmMicrokernelTester()
12005 .mr(1)
12006 .nr(8)
12007 .kr(8)
12008 .sr(1)
12009 .m(m)
12010 .n(n)
12011 .k(k)
12012 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012013 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -070012014 }
12015 }
12016 }
12017 }
12018
Frank Barcharde22685a2021-11-12 11:36:58 -080012019 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, small_kernel) {
Frank Barchard960ae342021-07-01 11:31:11 -070012020 TEST_REQUIRES_ARM_NEON;
12021 for (size_t k = 1; k <= 80; k += 17) {
12022 GemmMicrokernelTester()
12023 .mr(1)
12024 .nr(8)
12025 .kr(8)
12026 .sr(1)
12027 .m(1)
12028 .n(8)
12029 .k(k)
12030 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080012031 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -070012032 }
12033 }
12034
Frank Barcharde22685a2021-11-12 11:36:58 -080012035 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, small_kernel_subtile) {
Frank Barchard960ae342021-07-01 11:31:11 -070012036 TEST_REQUIRES_ARM_NEON;
12037 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012038 for (uint32_t n = 1; n <= 8; n++) {
12039 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard960ae342021-07-01 11:31:11 -070012040 GemmMicrokernelTester()
12041 .mr(1)
12042 .nr(8)
12043 .kr(8)
12044 .sr(1)
12045 .m(m)
12046 .n(n)
12047 .k(k)
12048 .ks(3)
12049 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012050 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -070012051 }
12052 }
12053 }
12054 }
12055
Frank Barcharde22685a2021-11-12 11:36:58 -080012056 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_gt_8_small_kernel) {
Frank Barchard960ae342021-07-01 11:31:11 -070012057 TEST_REQUIRES_ARM_NEON;
12058 for (uint32_t n = 9; n < 16; n++) {
12059 for (size_t k = 1; k <= 80; k += 17) {
12060 GemmMicrokernelTester()
12061 .mr(1)
12062 .nr(8)
12063 .kr(8)
12064 .sr(1)
12065 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012066 .n(n)
Frank Barchard960ae342021-07-01 11:31:11 -070012067 .k(k)
12068 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080012069 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -070012070 }
12071 }
12072 }
12073
Frank Barcharde22685a2021-11-12 11:36:58 -080012074 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_div_8_small_kernel) {
Frank Barchard960ae342021-07-01 11:31:11 -070012075 TEST_REQUIRES_ARM_NEON;
12076 for (uint32_t n = 16; n <= 24; n += 8) {
12077 for (size_t k = 1; k <= 80; k += 17) {
12078 GemmMicrokernelTester()
12079 .mr(1)
12080 .nr(8)
12081 .kr(8)
12082 .sr(1)
12083 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012084 .n(n)
Frank Barchard960ae342021-07-01 11:31:11 -070012085 .k(k)
12086 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080012087 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -070012088 }
12089 }
12090 }
12091
Frank Barcharde22685a2021-11-12 11:36:58 -080012092 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, strided_cm_subtile) {
Frank Barchard960ae342021-07-01 11:31:11 -070012093 TEST_REQUIRES_ARM_NEON;
12094 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012095 for (uint32_t n = 1; n <= 8; n++) {
12096 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard960ae342021-07-01 11:31:11 -070012097 GemmMicrokernelTester()
12098 .mr(1)
12099 .nr(8)
12100 .kr(8)
12101 .sr(1)
12102 .m(m)
12103 .n(n)
12104 .k(k)
12105 .cm_stride(11)
12106 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012107 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -070012108 }
12109 }
12110 }
12111 }
12112
Frank Barcharde22685a2021-11-12 11:36:58 -080012113 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, a_offset) {
Frank Barchard960ae342021-07-01 11:31:11 -070012114 TEST_REQUIRES_ARM_NEON;
12115 for (size_t k = 1; k <= 80; k += 17) {
12116 GemmMicrokernelTester()
12117 .mr(1)
12118 .nr(8)
12119 .kr(8)
12120 .sr(1)
12121 .m(1)
12122 .n(8)
12123 .k(k)
12124 .ks(3)
12125 .a_offset(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080012126 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -070012127 }
12128 }
12129
Frank Barcharde22685a2021-11-12 11:36:58 -080012130 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, zero) {
Frank Barchard960ae342021-07-01 11:31:11 -070012131 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080012132 for (size_t k = 1; k <= 80; k += 17) {
12133 for (uint32_t mz = 0; mz < 1; mz++) {
Frank Barchard960ae342021-07-01 11:31:11 -070012134 GemmMicrokernelTester()
12135 .mr(1)
12136 .nr(8)
12137 .kr(8)
12138 .sr(1)
12139 .m(1)
12140 .n(8)
12141 .k(k)
12142 .ks(3)
12143 .a_offset(83)
12144 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080012145 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -070012146 }
12147 }
12148 }
12149
Frank Barcharde22685a2021-11-12 11:36:58 -080012150 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, qmin) {
Frank Barchard960ae342021-07-01 11:31:11 -070012151 TEST_REQUIRES_ARM_NEON;
12152 GemmMicrokernelTester()
12153 .mr(1)
12154 .nr(8)
12155 .kr(8)
12156 .sr(1)
12157 .m(1)
12158 .n(8)
12159 .k(16)
12160 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012161 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -070012162 }
12163
Frank Barcharde22685a2021-11-12 11:36:58 -080012164 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, qmax) {
Frank Barchard960ae342021-07-01 11:31:11 -070012165 TEST_REQUIRES_ARM_NEON;
12166 GemmMicrokernelTester()
12167 .mr(1)
12168 .nr(8)
12169 .kr(8)
12170 .sr(1)
12171 .m(1)
12172 .n(8)
12173 .k(16)
12174 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012175 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -070012176 }
12177
Frank Barcharde22685a2021-11-12 11:36:58 -080012178 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, strided_cm) {
Frank Barchard960ae342021-07-01 11:31:11 -070012179 TEST_REQUIRES_ARM_NEON;
12180 GemmMicrokernelTester()
12181 .mr(1)
12182 .nr(8)
12183 .kr(8)
12184 .sr(1)
12185 .m(1)
12186 .n(8)
12187 .k(16)
12188 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080012189 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -070012190 }
Frank Barcharde4d3f762021-12-23 15:31:43 -080012191#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Frank Barchard960ae342021-07-01 11:31:11 -070012192
12193
Frank Barcharde4d3f762021-12-23 15:31:43 -080012194#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Frank Barchard98af05c2021-06-30 12:15:04 -070012195 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8) {
12196 TEST_REQUIRES_ARM_NEON;
12197 GemmMicrokernelTester()
12198 .mr(4)
12199 .nr(16)
12200 .kr(1)
12201 .sr(1)
12202 .m(4)
12203 .n(16)
12204 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080012205 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard98af05c2021-06-30 12:15:04 -070012206 }
12207
12208 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, strided_cn) {
12209 TEST_REQUIRES_ARM_NEON;
12210 GemmMicrokernelTester()
12211 .mr(4)
12212 .nr(16)
12213 .kr(1)
12214 .sr(1)
12215 .m(4)
12216 .n(16)
12217 .k(8)
12218 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080012219 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard98af05c2021-06-30 12:15:04 -070012220 }
12221
12222 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_subtile) {
12223 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080012224 for (uint32_t n = 1; n <= 16; n++) {
12225 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard98af05c2021-06-30 12:15:04 -070012226 GemmMicrokernelTester()
12227 .mr(4)
12228 .nr(16)
12229 .kr(1)
12230 .sr(1)
12231 .m(m)
12232 .n(n)
12233 .k(8)
12234 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012235 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard98af05c2021-06-30 12:15:04 -070012236 }
12237 }
12238 }
12239
12240 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_subtile_m) {
12241 TEST_REQUIRES_ARM_NEON;
12242 for (uint32_t m = 1; m <= 4; m++) {
12243 GemmMicrokernelTester()
12244 .mr(4)
12245 .nr(16)
12246 .kr(1)
12247 .sr(1)
12248 .m(m)
12249 .n(16)
12250 .k(8)
12251 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012252 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard98af05c2021-06-30 12:15:04 -070012253 }
12254 }
12255
12256 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_subtile_n) {
12257 TEST_REQUIRES_ARM_NEON;
12258 for (uint32_t n = 1; n <= 16; n++) {
12259 GemmMicrokernelTester()
12260 .mr(4)
12261 .nr(16)
12262 .kr(1)
12263 .sr(1)
12264 .m(4)
12265 .n(n)
12266 .k(8)
12267 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012268 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard98af05c2021-06-30 12:15:04 -070012269 }
12270 }
12271
12272 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_lt_8) {
12273 TEST_REQUIRES_ARM_NEON;
12274 for (size_t k = 1; k < 8; k++) {
12275 GemmMicrokernelTester()
12276 .mr(4)
12277 .nr(16)
12278 .kr(1)
12279 .sr(1)
12280 .m(4)
12281 .n(16)
12282 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080012283 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard98af05c2021-06-30 12:15:04 -070012284 }
12285 }
12286
12287 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_lt_8_subtile) {
12288 TEST_REQUIRES_ARM_NEON;
12289 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012290 for (uint32_t n = 1; n <= 16; n++) {
12291 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard98af05c2021-06-30 12:15:04 -070012292 GemmMicrokernelTester()
12293 .mr(4)
12294 .nr(16)
12295 .kr(1)
12296 .sr(1)
12297 .m(m)
12298 .n(n)
12299 .k(k)
12300 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012301 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard98af05c2021-06-30 12:15:04 -070012302 }
12303 }
12304 }
12305 }
12306
12307 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_gt_8) {
12308 TEST_REQUIRES_ARM_NEON;
12309 for (size_t k = 9; k < 16; k++) {
12310 GemmMicrokernelTester()
12311 .mr(4)
12312 .nr(16)
12313 .kr(1)
12314 .sr(1)
12315 .m(4)
12316 .n(16)
12317 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080012318 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard98af05c2021-06-30 12:15:04 -070012319 }
12320 }
12321
12322 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_gt_8_subtile) {
12323 TEST_REQUIRES_ARM_NEON;
12324 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012325 for (uint32_t n = 1; n <= 16; n++) {
12326 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard98af05c2021-06-30 12:15:04 -070012327 GemmMicrokernelTester()
12328 .mr(4)
12329 .nr(16)
12330 .kr(1)
12331 .sr(1)
12332 .m(m)
12333 .n(n)
12334 .k(k)
12335 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012336 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard98af05c2021-06-30 12:15:04 -070012337 }
12338 }
12339 }
12340 }
12341
12342 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_div_8) {
12343 TEST_REQUIRES_ARM_NEON;
12344 for (size_t k = 16; k <= 80; k += 8) {
12345 GemmMicrokernelTester()
12346 .mr(4)
12347 .nr(16)
12348 .kr(1)
12349 .sr(1)
12350 .m(4)
12351 .n(16)
12352 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080012353 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard98af05c2021-06-30 12:15:04 -070012354 }
12355 }
12356
12357 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_div_8_subtile) {
12358 TEST_REQUIRES_ARM_NEON;
12359 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012360 for (uint32_t n = 1; n <= 16; n++) {
12361 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard98af05c2021-06-30 12:15:04 -070012362 GemmMicrokernelTester()
12363 .mr(4)
12364 .nr(16)
12365 .kr(1)
12366 .sr(1)
12367 .m(m)
12368 .n(n)
12369 .k(k)
12370 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012371 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard98af05c2021-06-30 12:15:04 -070012372 }
12373 }
12374 }
12375 }
12376
12377 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_16) {
12378 TEST_REQUIRES_ARM_NEON;
12379 for (uint32_t n = 17; n < 32; n++) {
12380 for (size_t k = 1; k <= 40; k += 9) {
12381 GemmMicrokernelTester()
12382 .mr(4)
12383 .nr(16)
12384 .kr(1)
12385 .sr(1)
12386 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012387 .n(n)
Frank Barchard98af05c2021-06-30 12:15:04 -070012388 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080012389 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard98af05c2021-06-30 12:15:04 -070012390 }
12391 }
12392 }
12393
12394 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_16_strided_cn) {
12395 TEST_REQUIRES_ARM_NEON;
12396 for (uint32_t n = 17; n < 32; n++) {
12397 for (size_t k = 1; k <= 40; k += 9) {
12398 GemmMicrokernelTester()
12399 .mr(4)
12400 .nr(16)
12401 .kr(1)
12402 .sr(1)
12403 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012404 .n(n)
Frank Barchard98af05c2021-06-30 12:15:04 -070012405 .k(k)
12406 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080012407 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard98af05c2021-06-30 12:15:04 -070012408 }
12409 }
12410 }
12411
12412 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_16_subtile) {
12413 TEST_REQUIRES_ARM_NEON;
12414 for (uint32_t n = 17; n < 32; n++) {
12415 for (size_t k = 1; k <= 40; k += 9) {
12416 for (uint32_t m = 1; m <= 4; m++) {
12417 GemmMicrokernelTester()
12418 .mr(4)
12419 .nr(16)
12420 .kr(1)
12421 .sr(1)
12422 .m(m)
12423 .n(n)
12424 .k(k)
12425 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012426 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard98af05c2021-06-30 12:15:04 -070012427 }
12428 }
12429 }
12430 }
12431
12432 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_16) {
12433 TEST_REQUIRES_ARM_NEON;
12434 for (uint32_t n = 32; n <= 48; n += 16) {
12435 for (size_t k = 1; k <= 40; k += 9) {
12436 GemmMicrokernelTester()
12437 .mr(4)
12438 .nr(16)
12439 .kr(1)
12440 .sr(1)
12441 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012442 .n(n)
Frank Barchard98af05c2021-06-30 12:15:04 -070012443 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080012444 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard98af05c2021-06-30 12:15:04 -070012445 }
12446 }
12447 }
12448
12449 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_16_strided_cn) {
12450 TEST_REQUIRES_ARM_NEON;
12451 for (uint32_t n = 32; n <= 48; n += 16) {
12452 for (size_t k = 1; k <= 40; k += 9) {
12453 GemmMicrokernelTester()
12454 .mr(4)
12455 .nr(16)
12456 .kr(1)
12457 .sr(1)
12458 .m(4)
12459 .n(n)
12460 .k(k)
12461 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080012462 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard98af05c2021-06-30 12:15:04 -070012463 }
12464 }
12465 }
12466
12467 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_16_subtile) {
12468 TEST_REQUIRES_ARM_NEON;
12469 for (uint32_t n = 32; n <= 48; n += 16) {
12470 for (size_t k = 1; k <= 40; k += 9) {
12471 for (uint32_t m = 1; m <= 4; m++) {
12472 GemmMicrokernelTester()
12473 .mr(4)
12474 .nr(16)
12475 .kr(1)
12476 .sr(1)
12477 .m(m)
12478 .n(n)
12479 .k(k)
12480 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012481 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard98af05c2021-06-30 12:15:04 -070012482 }
12483 }
12484 }
12485 }
12486
12487 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, small_kernel) {
12488 TEST_REQUIRES_ARM_NEON;
12489 for (size_t k = 1; k <= 40; k += 9) {
12490 GemmMicrokernelTester()
12491 .mr(4)
12492 .nr(16)
12493 .kr(1)
12494 .sr(1)
12495 .m(4)
12496 .n(16)
12497 .k(k)
12498 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080012499 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard98af05c2021-06-30 12:15:04 -070012500 }
12501 }
12502
12503 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, small_kernel_subtile) {
12504 TEST_REQUIRES_ARM_NEON;
12505 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012506 for (uint32_t n = 1; n <= 16; n++) {
12507 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard98af05c2021-06-30 12:15:04 -070012508 GemmMicrokernelTester()
12509 .mr(4)
12510 .nr(16)
12511 .kr(1)
12512 .sr(1)
12513 .m(m)
12514 .n(n)
12515 .k(k)
12516 .ks(3)
12517 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012518 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard98af05c2021-06-30 12:15:04 -070012519 }
12520 }
12521 }
12522 }
12523
12524 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_16_small_kernel) {
12525 TEST_REQUIRES_ARM_NEON;
12526 for (uint32_t n = 17; n < 32; n++) {
12527 for (size_t k = 1; k <= 40; k += 9) {
12528 GemmMicrokernelTester()
12529 .mr(4)
12530 .nr(16)
12531 .kr(1)
12532 .sr(1)
12533 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012534 .n(n)
Frank Barchard98af05c2021-06-30 12:15:04 -070012535 .k(k)
12536 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080012537 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard98af05c2021-06-30 12:15:04 -070012538 }
12539 }
12540 }
12541
12542 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_16_small_kernel) {
12543 TEST_REQUIRES_ARM_NEON;
12544 for (uint32_t n = 32; n <= 48; n += 16) {
12545 for (size_t k = 1; k <= 40; k += 9) {
12546 GemmMicrokernelTester()
12547 .mr(4)
12548 .nr(16)
12549 .kr(1)
12550 .sr(1)
12551 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012552 .n(n)
Frank Barchard98af05c2021-06-30 12:15:04 -070012553 .k(k)
12554 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080012555 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard98af05c2021-06-30 12:15:04 -070012556 }
12557 }
12558 }
12559
12560 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, strided_cm_subtile) {
12561 TEST_REQUIRES_ARM_NEON;
12562 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012563 for (uint32_t n = 1; n <= 16; n++) {
12564 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard98af05c2021-06-30 12:15:04 -070012565 GemmMicrokernelTester()
12566 .mr(4)
12567 .nr(16)
12568 .kr(1)
12569 .sr(1)
12570 .m(m)
12571 .n(n)
12572 .k(k)
12573 .cm_stride(19)
12574 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012575 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard98af05c2021-06-30 12:15:04 -070012576 }
12577 }
12578 }
12579 }
12580
12581 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, a_offset) {
12582 TEST_REQUIRES_ARM_NEON;
12583 for (size_t k = 1; k <= 40; k += 9) {
12584 GemmMicrokernelTester()
12585 .mr(4)
12586 .nr(16)
12587 .kr(1)
12588 .sr(1)
12589 .m(4)
12590 .n(16)
12591 .k(k)
12592 .ks(3)
12593 .a_offset(163)
Marat Dukhan50323b82022-01-11 00:12:01 -080012594 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard98af05c2021-06-30 12:15:04 -070012595 }
12596 }
12597
12598 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, zero) {
12599 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080012600 for (size_t k = 1; k <= 40; k += 9) {
12601 for (uint32_t mz = 0; mz < 4; mz++) {
Frank Barchard98af05c2021-06-30 12:15:04 -070012602 GemmMicrokernelTester()
12603 .mr(4)
12604 .nr(16)
12605 .kr(1)
12606 .sr(1)
12607 .m(4)
12608 .n(16)
12609 .k(k)
12610 .ks(3)
12611 .a_offset(163)
12612 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080012613 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard98af05c2021-06-30 12:15:04 -070012614 }
12615 }
12616 }
12617
12618 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, qmin) {
12619 TEST_REQUIRES_ARM_NEON;
12620 GemmMicrokernelTester()
12621 .mr(4)
12622 .nr(16)
12623 .kr(1)
12624 .sr(1)
12625 .m(4)
12626 .n(16)
12627 .k(8)
12628 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012629 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard98af05c2021-06-30 12:15:04 -070012630 }
12631
12632 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, qmax) {
12633 TEST_REQUIRES_ARM_NEON;
12634 GemmMicrokernelTester()
12635 .mr(4)
12636 .nr(16)
12637 .kr(1)
12638 .sr(1)
12639 .m(4)
12640 .n(16)
12641 .k(8)
12642 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012643 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard98af05c2021-06-30 12:15:04 -070012644 }
12645
12646 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, strided_cm) {
12647 TEST_REQUIRES_ARM_NEON;
12648 GemmMicrokernelTester()
12649 .mr(4)
12650 .nr(16)
12651 .kr(1)
12652 .sr(1)
12653 .m(4)
12654 .n(16)
12655 .k(8)
12656 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080012657 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard98af05c2021-06-30 12:15:04 -070012658 }
Frank Barcharde4d3f762021-12-23 15:31:43 -080012659#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Frank Barchard98af05c2021-06-30 12:15:04 -070012660
12661
Frank Barcharde4d3f762021-12-23 15:31:43 -080012662#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Frank Barchard1a0b2762021-06-29 18:37:59 -070012663 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16) {
12664 TEST_REQUIRES_ARM_NEON_DOT;
12665 GemmMicrokernelTester()
12666 .mr(4)
12667 .nr(16)
12668 .kr(4)
12669 .sr(1)
12670 .m(4)
12671 .n(16)
12672 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080012673 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -070012674 }
12675
12676 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, strided_cn) {
12677 TEST_REQUIRES_ARM_NEON_DOT;
12678 GemmMicrokernelTester()
12679 .mr(4)
12680 .nr(16)
12681 .kr(4)
12682 .sr(1)
12683 .m(4)
12684 .n(16)
12685 .k(16)
12686 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080012687 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -070012688 }
12689
12690 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16_subtile) {
12691 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng83844ae2022-01-14 09:52:25 -080012692 for (uint32_t n = 1; n <= 16; n++) {
12693 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard1a0b2762021-06-29 18:37:59 -070012694 GemmMicrokernelTester()
12695 .mr(4)
12696 .nr(16)
12697 .kr(4)
12698 .sr(1)
12699 .m(m)
12700 .n(n)
12701 .k(16)
12702 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012703 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -070012704 }
12705 }
12706 }
12707
12708 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16_subtile_m) {
12709 TEST_REQUIRES_ARM_NEON_DOT;
12710 for (uint32_t m = 1; m <= 4; m++) {
12711 GemmMicrokernelTester()
12712 .mr(4)
12713 .nr(16)
12714 .kr(4)
12715 .sr(1)
12716 .m(m)
12717 .n(16)
12718 .k(16)
12719 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012720 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -070012721 }
12722 }
12723
12724 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16_subtile_n) {
12725 TEST_REQUIRES_ARM_NEON_DOT;
12726 for (uint32_t n = 1; n <= 16; n++) {
12727 GemmMicrokernelTester()
12728 .mr(4)
12729 .nr(16)
12730 .kr(4)
12731 .sr(1)
12732 .m(4)
12733 .n(n)
12734 .k(16)
12735 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012736 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -070012737 }
12738 }
12739
12740 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_lt_16) {
12741 TEST_REQUIRES_ARM_NEON_DOT;
12742 for (size_t k = 1; k < 16; k++) {
12743 GemmMicrokernelTester()
12744 .mr(4)
12745 .nr(16)
12746 .kr(4)
12747 .sr(1)
12748 .m(4)
12749 .n(16)
12750 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080012751 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -070012752 }
12753 }
12754
12755 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_lt_16_subtile) {
12756 TEST_REQUIRES_ARM_NEON_DOT;
12757 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012758 for (uint32_t n = 1; n <= 16; n++) {
12759 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard1a0b2762021-06-29 18:37:59 -070012760 GemmMicrokernelTester()
12761 .mr(4)
12762 .nr(16)
12763 .kr(4)
12764 .sr(1)
12765 .m(m)
12766 .n(n)
12767 .k(k)
12768 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012769 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -070012770 }
12771 }
12772 }
12773 }
12774
12775 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_gt_16) {
12776 TEST_REQUIRES_ARM_NEON_DOT;
12777 for (size_t k = 17; k < 32; k++) {
12778 GemmMicrokernelTester()
12779 .mr(4)
12780 .nr(16)
12781 .kr(4)
12782 .sr(1)
12783 .m(4)
12784 .n(16)
12785 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080012786 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -070012787 }
12788 }
12789
12790 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_gt_16_subtile) {
12791 TEST_REQUIRES_ARM_NEON_DOT;
12792 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012793 for (uint32_t n = 1; n <= 16; n++) {
12794 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard1a0b2762021-06-29 18:37:59 -070012795 GemmMicrokernelTester()
12796 .mr(4)
12797 .nr(16)
12798 .kr(4)
12799 .sr(1)
12800 .m(m)
12801 .n(n)
12802 .k(k)
12803 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012804 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -070012805 }
12806 }
12807 }
12808 }
12809
12810 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_div_16) {
12811 TEST_REQUIRES_ARM_NEON_DOT;
12812 for (size_t k = 32; k <= 160; k += 16) {
12813 GemmMicrokernelTester()
12814 .mr(4)
12815 .nr(16)
12816 .kr(4)
12817 .sr(1)
12818 .m(4)
12819 .n(16)
12820 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080012821 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -070012822 }
12823 }
12824
12825 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_div_16_subtile) {
12826 TEST_REQUIRES_ARM_NEON_DOT;
12827 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012828 for (uint32_t n = 1; n <= 16; n++) {
12829 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard1a0b2762021-06-29 18:37:59 -070012830 GemmMicrokernelTester()
12831 .mr(4)
12832 .nr(16)
12833 .kr(4)
12834 .sr(1)
12835 .m(m)
12836 .n(n)
12837 .k(k)
12838 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012839 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -070012840 }
12841 }
12842 }
12843 }
12844
12845 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_16) {
12846 TEST_REQUIRES_ARM_NEON_DOT;
12847 for (uint32_t n = 17; n < 32; n++) {
12848 for (size_t k = 1; k <= 80; k += 17) {
12849 GemmMicrokernelTester()
12850 .mr(4)
12851 .nr(16)
12852 .kr(4)
12853 .sr(1)
12854 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012855 .n(n)
Frank Barchard1a0b2762021-06-29 18:37:59 -070012856 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080012857 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -070012858 }
12859 }
12860 }
12861
12862 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_16_strided_cn) {
12863 TEST_REQUIRES_ARM_NEON_DOT;
12864 for (uint32_t n = 17; n < 32; n++) {
12865 for (size_t k = 1; k <= 80; k += 17) {
12866 GemmMicrokernelTester()
12867 .mr(4)
12868 .nr(16)
12869 .kr(4)
12870 .sr(1)
12871 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012872 .n(n)
Frank Barchard1a0b2762021-06-29 18:37:59 -070012873 .k(k)
12874 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080012875 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -070012876 }
12877 }
12878 }
12879
12880 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_16_subtile) {
12881 TEST_REQUIRES_ARM_NEON_DOT;
12882 for (uint32_t n = 17; n < 32; n++) {
12883 for (size_t k = 1; k <= 80; k += 17) {
12884 for (uint32_t m = 1; m <= 4; m++) {
12885 GemmMicrokernelTester()
12886 .mr(4)
12887 .nr(16)
12888 .kr(4)
12889 .sr(1)
12890 .m(m)
12891 .n(n)
12892 .k(k)
12893 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012894 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -070012895 }
12896 }
12897 }
12898 }
12899
12900 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_div_16) {
12901 TEST_REQUIRES_ARM_NEON_DOT;
12902 for (uint32_t n = 32; n <= 48; n += 16) {
12903 for (size_t k = 1; k <= 80; k += 17) {
12904 GemmMicrokernelTester()
12905 .mr(4)
12906 .nr(16)
12907 .kr(4)
12908 .sr(1)
12909 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012910 .n(n)
Frank Barchard1a0b2762021-06-29 18:37:59 -070012911 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080012912 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -070012913 }
12914 }
12915 }
12916
12917 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_div_16_strided_cn) {
12918 TEST_REQUIRES_ARM_NEON_DOT;
12919 for (uint32_t n = 32; n <= 48; n += 16) {
12920 for (size_t k = 1; k <= 80; k += 17) {
12921 GemmMicrokernelTester()
12922 .mr(4)
12923 .nr(16)
12924 .kr(4)
12925 .sr(1)
12926 .m(4)
12927 .n(n)
12928 .k(k)
12929 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080012930 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -070012931 }
12932 }
12933 }
12934
12935 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_div_16_subtile) {
12936 TEST_REQUIRES_ARM_NEON_DOT;
12937 for (uint32_t n = 32; n <= 48; n += 16) {
12938 for (size_t k = 1; k <= 80; k += 17) {
12939 for (uint32_t m = 1; m <= 4; m++) {
12940 GemmMicrokernelTester()
12941 .mr(4)
12942 .nr(16)
12943 .kr(4)
12944 .sr(1)
12945 .m(m)
12946 .n(n)
12947 .k(k)
12948 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012949 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -070012950 }
12951 }
12952 }
12953 }
12954
12955 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, small_kernel) {
12956 TEST_REQUIRES_ARM_NEON_DOT;
12957 for (size_t k = 1; k <= 80; k += 17) {
12958 GemmMicrokernelTester()
12959 .mr(4)
12960 .nr(16)
12961 .kr(4)
12962 .sr(1)
12963 .m(4)
12964 .n(16)
12965 .k(k)
12966 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080012967 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -070012968 }
12969 }
12970
12971 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, small_kernel_subtile) {
12972 TEST_REQUIRES_ARM_NEON_DOT;
12973 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012974 for (uint32_t n = 1; n <= 16; n++) {
12975 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard1a0b2762021-06-29 18:37:59 -070012976 GemmMicrokernelTester()
12977 .mr(4)
12978 .nr(16)
12979 .kr(4)
12980 .sr(1)
12981 .m(m)
12982 .n(n)
12983 .k(k)
12984 .ks(3)
12985 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012986 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -070012987 }
12988 }
12989 }
12990 }
12991
12992 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_16_small_kernel) {
12993 TEST_REQUIRES_ARM_NEON_DOT;
12994 for (uint32_t n = 17; n < 32; n++) {
12995 for (size_t k = 1; k <= 80; k += 17) {
12996 GemmMicrokernelTester()
12997 .mr(4)
12998 .nr(16)
12999 .kr(4)
13000 .sr(1)
13001 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013002 .n(n)
Frank Barchard1a0b2762021-06-29 18:37:59 -070013003 .k(k)
13004 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080013005 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -070013006 }
13007 }
13008 }
13009
13010 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_div_16_small_kernel) {
13011 TEST_REQUIRES_ARM_NEON_DOT;
13012 for (uint32_t n = 32; n <= 48; n += 16) {
13013 for (size_t k = 1; k <= 80; k += 17) {
13014 GemmMicrokernelTester()
13015 .mr(4)
13016 .nr(16)
13017 .kr(4)
13018 .sr(1)
13019 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013020 .n(n)
Frank Barchard1a0b2762021-06-29 18:37:59 -070013021 .k(k)
13022 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080013023 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -070013024 }
13025 }
13026 }
13027
13028 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, strided_cm_subtile) {
13029 TEST_REQUIRES_ARM_NEON_DOT;
13030 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013031 for (uint32_t n = 1; n <= 16; n++) {
13032 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard1a0b2762021-06-29 18:37:59 -070013033 GemmMicrokernelTester()
13034 .mr(4)
13035 .nr(16)
13036 .kr(4)
13037 .sr(1)
13038 .m(m)
13039 .n(n)
13040 .k(k)
13041 .cm_stride(19)
13042 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013043 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -070013044 }
13045 }
13046 }
13047 }
13048
13049 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, a_offset) {
13050 TEST_REQUIRES_ARM_NEON_DOT;
13051 for (size_t k = 1; k <= 80; k += 17) {
13052 GemmMicrokernelTester()
13053 .mr(4)
13054 .nr(16)
13055 .kr(4)
13056 .sr(1)
13057 .m(4)
13058 .n(16)
13059 .k(k)
13060 .ks(3)
13061 .a_offset(331)
Marat Dukhan50323b82022-01-11 00:12:01 -080013062 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -070013063 }
13064 }
13065
13066 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, zero) {
13067 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng83844ae2022-01-14 09:52:25 -080013068 for (size_t k = 1; k <= 80; k += 17) {
13069 for (uint32_t mz = 0; mz < 4; mz++) {
Frank Barchard1a0b2762021-06-29 18:37:59 -070013070 GemmMicrokernelTester()
13071 .mr(4)
13072 .nr(16)
13073 .kr(4)
13074 .sr(1)
13075 .m(4)
13076 .n(16)
13077 .k(k)
13078 .ks(3)
13079 .a_offset(331)
13080 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080013081 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -070013082 }
13083 }
13084 }
13085
13086 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, qmin) {
13087 TEST_REQUIRES_ARM_NEON_DOT;
13088 GemmMicrokernelTester()
13089 .mr(4)
13090 .nr(16)
13091 .kr(4)
13092 .sr(1)
13093 .m(4)
13094 .n(16)
13095 .k(16)
13096 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013097 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -070013098 }
13099
13100 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, qmax) {
13101 TEST_REQUIRES_ARM_NEON_DOT;
13102 GemmMicrokernelTester()
13103 .mr(4)
13104 .nr(16)
13105 .kr(4)
13106 .sr(1)
13107 .m(4)
13108 .n(16)
13109 .k(16)
13110 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013111 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -070013112 }
13113
13114 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, strided_cm) {
13115 TEST_REQUIRES_ARM_NEON_DOT;
13116 GemmMicrokernelTester()
13117 .mr(4)
13118 .nr(16)
13119 .kr(4)
13120 .sr(1)
13121 .m(4)
13122 .n(16)
13123 .k(16)
13124 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080013125 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -070013126 }
Frank Barcharde4d3f762021-12-23 15:31:43 -080013127#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Frank Barchard1a0b2762021-06-29 18:37:59 -070013128
13129
Marat Dukhancf055852021-06-26 09:05:09 -070013130#if XNN_ARCH_ARM || XNN_ARCH_ARM64
13131 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_eq_8) {
13132 TEST_REQUIRES_ARM_NEON;
13133 GemmMicrokernelTester()
13134 .mr(4)
13135 .nr(16)
13136 .kr(1)
13137 .sr(1)
13138 .m(4)
13139 .n(16)
13140 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080013141 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070013142 }
13143
13144 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, strided_cn) {
13145 TEST_REQUIRES_ARM_NEON;
13146 GemmMicrokernelTester()
13147 .mr(4)
13148 .nr(16)
13149 .kr(1)
13150 .sr(1)
13151 .m(4)
13152 .n(16)
13153 .k(8)
13154 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080013155 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070013156 }
13157
13158 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_eq_8_subtile) {
13159 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080013160 for (uint32_t n = 1; n <= 16; n++) {
13161 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhancf055852021-06-26 09:05:09 -070013162 GemmMicrokernelTester()
13163 .mr(4)
13164 .nr(16)
13165 .kr(1)
13166 .sr(1)
13167 .m(m)
13168 .n(n)
13169 .k(8)
13170 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013171 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070013172 }
13173 }
13174 }
13175
13176 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_eq_8_subtile_m) {
13177 TEST_REQUIRES_ARM_NEON;
13178 for (uint32_t m = 1; m <= 4; m++) {
13179 GemmMicrokernelTester()
13180 .mr(4)
13181 .nr(16)
13182 .kr(1)
13183 .sr(1)
13184 .m(m)
13185 .n(16)
13186 .k(8)
13187 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013188 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070013189 }
13190 }
13191
13192 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_eq_8_subtile_n) {
13193 TEST_REQUIRES_ARM_NEON;
13194 for (uint32_t n = 1; n <= 16; n++) {
13195 GemmMicrokernelTester()
13196 .mr(4)
13197 .nr(16)
13198 .kr(1)
13199 .sr(1)
13200 .m(4)
13201 .n(n)
13202 .k(8)
13203 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013204 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070013205 }
13206 }
13207
13208 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_lt_8) {
13209 TEST_REQUIRES_ARM_NEON;
13210 for (size_t k = 1; k < 8; k++) {
13211 GemmMicrokernelTester()
13212 .mr(4)
13213 .nr(16)
13214 .kr(1)
13215 .sr(1)
13216 .m(4)
13217 .n(16)
13218 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080013219 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070013220 }
13221 }
13222
13223 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_lt_8_subtile) {
13224 TEST_REQUIRES_ARM_NEON;
13225 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013226 for (uint32_t n = 1; n <= 16; n++) {
13227 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhancf055852021-06-26 09:05:09 -070013228 GemmMicrokernelTester()
13229 .mr(4)
13230 .nr(16)
13231 .kr(1)
13232 .sr(1)
13233 .m(m)
13234 .n(n)
13235 .k(k)
13236 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013237 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070013238 }
13239 }
13240 }
13241 }
13242
13243 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_gt_8) {
13244 TEST_REQUIRES_ARM_NEON;
13245 for (size_t k = 9; k < 16; k++) {
13246 GemmMicrokernelTester()
13247 .mr(4)
13248 .nr(16)
13249 .kr(1)
13250 .sr(1)
13251 .m(4)
13252 .n(16)
13253 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080013254 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070013255 }
13256 }
13257
13258 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_gt_8_subtile) {
13259 TEST_REQUIRES_ARM_NEON;
13260 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013261 for (uint32_t n = 1; n <= 16; n++) {
13262 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhancf055852021-06-26 09:05:09 -070013263 GemmMicrokernelTester()
13264 .mr(4)
13265 .nr(16)
13266 .kr(1)
13267 .sr(1)
13268 .m(m)
13269 .n(n)
13270 .k(k)
13271 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013272 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070013273 }
13274 }
13275 }
13276 }
13277
13278 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_div_8) {
13279 TEST_REQUIRES_ARM_NEON;
13280 for (size_t k = 16; k <= 80; k += 8) {
13281 GemmMicrokernelTester()
13282 .mr(4)
13283 .nr(16)
13284 .kr(1)
13285 .sr(1)
13286 .m(4)
13287 .n(16)
13288 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080013289 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070013290 }
13291 }
13292
13293 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_div_8_subtile) {
13294 TEST_REQUIRES_ARM_NEON;
13295 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013296 for (uint32_t n = 1; n <= 16; n++) {
13297 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhancf055852021-06-26 09:05:09 -070013298 GemmMicrokernelTester()
13299 .mr(4)
13300 .nr(16)
13301 .kr(1)
13302 .sr(1)
13303 .m(m)
13304 .n(n)
13305 .k(k)
13306 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013307 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070013308 }
13309 }
13310 }
13311 }
13312
13313 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_gt_16) {
13314 TEST_REQUIRES_ARM_NEON;
13315 for (uint32_t n = 17; n < 32; n++) {
13316 for (size_t k = 1; k <= 40; k += 9) {
13317 GemmMicrokernelTester()
13318 .mr(4)
13319 .nr(16)
13320 .kr(1)
13321 .sr(1)
13322 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013323 .n(n)
Marat Dukhancf055852021-06-26 09:05:09 -070013324 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080013325 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070013326 }
13327 }
13328 }
13329
13330 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_gt_16_strided_cn) {
13331 TEST_REQUIRES_ARM_NEON;
13332 for (uint32_t n = 17; n < 32; n++) {
13333 for (size_t k = 1; k <= 40; k += 9) {
13334 GemmMicrokernelTester()
13335 .mr(4)
13336 .nr(16)
13337 .kr(1)
13338 .sr(1)
13339 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013340 .n(n)
Marat Dukhancf055852021-06-26 09:05:09 -070013341 .k(k)
13342 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080013343 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070013344 }
13345 }
13346 }
13347
13348 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_gt_16_subtile) {
13349 TEST_REQUIRES_ARM_NEON;
13350 for (uint32_t n = 17; n < 32; n++) {
13351 for (size_t k = 1; k <= 40; k += 9) {
13352 for (uint32_t m = 1; m <= 4; m++) {
13353 GemmMicrokernelTester()
13354 .mr(4)
13355 .nr(16)
13356 .kr(1)
13357 .sr(1)
13358 .m(m)
13359 .n(n)
13360 .k(k)
13361 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013362 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070013363 }
13364 }
13365 }
13366 }
13367
13368 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_div_16) {
13369 TEST_REQUIRES_ARM_NEON;
13370 for (uint32_t n = 32; n <= 48; n += 16) {
13371 for (size_t k = 1; k <= 40; k += 9) {
13372 GemmMicrokernelTester()
13373 .mr(4)
13374 .nr(16)
13375 .kr(1)
13376 .sr(1)
13377 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013378 .n(n)
Marat Dukhancf055852021-06-26 09:05:09 -070013379 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080013380 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070013381 }
13382 }
13383 }
13384
13385 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_div_16_strided_cn) {
13386 TEST_REQUIRES_ARM_NEON;
13387 for (uint32_t n = 32; n <= 48; n += 16) {
13388 for (size_t k = 1; k <= 40; k += 9) {
13389 GemmMicrokernelTester()
13390 .mr(4)
13391 .nr(16)
13392 .kr(1)
13393 .sr(1)
13394 .m(4)
13395 .n(n)
13396 .k(k)
13397 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080013398 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070013399 }
13400 }
13401 }
13402
13403 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_div_16_subtile) {
13404 TEST_REQUIRES_ARM_NEON;
13405 for (uint32_t n = 32; n <= 48; n += 16) {
13406 for (size_t k = 1; k <= 40; k += 9) {
13407 for (uint32_t m = 1; m <= 4; m++) {
13408 GemmMicrokernelTester()
13409 .mr(4)
13410 .nr(16)
13411 .kr(1)
13412 .sr(1)
13413 .m(m)
13414 .n(n)
13415 .k(k)
13416 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013417 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070013418 }
13419 }
13420 }
13421 }
13422
13423 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, small_kernel) {
13424 TEST_REQUIRES_ARM_NEON;
13425 for (size_t k = 1; k <= 40; k += 9) {
13426 GemmMicrokernelTester()
13427 .mr(4)
13428 .nr(16)
13429 .kr(1)
13430 .sr(1)
13431 .m(4)
13432 .n(16)
13433 .k(k)
13434 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080013435 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070013436 }
13437 }
13438
13439 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, small_kernel_subtile) {
13440 TEST_REQUIRES_ARM_NEON;
13441 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013442 for (uint32_t n = 1; n <= 16; n++) {
13443 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhancf055852021-06-26 09:05:09 -070013444 GemmMicrokernelTester()
13445 .mr(4)
13446 .nr(16)
13447 .kr(1)
13448 .sr(1)
13449 .m(m)
13450 .n(n)
13451 .k(k)
13452 .ks(3)
13453 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013454 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070013455 }
13456 }
13457 }
13458 }
13459
13460 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_gt_16_small_kernel) {
13461 TEST_REQUIRES_ARM_NEON;
13462 for (uint32_t n = 17; n < 32; n++) {
13463 for (size_t k = 1; k <= 40; k += 9) {
13464 GemmMicrokernelTester()
13465 .mr(4)
13466 .nr(16)
13467 .kr(1)
13468 .sr(1)
13469 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013470 .n(n)
Marat Dukhancf055852021-06-26 09:05:09 -070013471 .k(k)
13472 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080013473 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070013474 }
13475 }
13476 }
13477
13478 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_div_16_small_kernel) {
13479 TEST_REQUIRES_ARM_NEON;
13480 for (uint32_t n = 32; n <= 48; n += 16) {
13481 for (size_t k = 1; k <= 40; k += 9) {
13482 GemmMicrokernelTester()
13483 .mr(4)
13484 .nr(16)
13485 .kr(1)
13486 .sr(1)
13487 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013488 .n(n)
Marat Dukhancf055852021-06-26 09:05:09 -070013489 .k(k)
13490 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080013491 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070013492 }
13493 }
13494 }
13495
13496 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, strided_cm_subtile) {
13497 TEST_REQUIRES_ARM_NEON;
13498 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013499 for (uint32_t n = 1; n <= 16; n++) {
13500 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhancf055852021-06-26 09:05:09 -070013501 GemmMicrokernelTester()
13502 .mr(4)
13503 .nr(16)
13504 .kr(1)
13505 .sr(1)
13506 .m(m)
13507 .n(n)
13508 .k(k)
13509 .cm_stride(19)
13510 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013511 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070013512 }
13513 }
13514 }
13515 }
13516
13517 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, a_offset) {
13518 TEST_REQUIRES_ARM_NEON;
13519 for (size_t k = 1; k <= 40; k += 9) {
13520 GemmMicrokernelTester()
13521 .mr(4)
13522 .nr(16)
13523 .kr(1)
13524 .sr(1)
13525 .m(4)
13526 .n(16)
13527 .k(k)
13528 .ks(3)
13529 .a_offset(163)
Marat Dukhan50323b82022-01-11 00:12:01 -080013530 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070013531 }
13532 }
13533
13534 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, zero) {
13535 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080013536 for (size_t k = 1; k <= 40; k += 9) {
13537 for (uint32_t mz = 0; mz < 4; mz++) {
Marat Dukhancf055852021-06-26 09:05:09 -070013538 GemmMicrokernelTester()
13539 .mr(4)
13540 .nr(16)
13541 .kr(1)
13542 .sr(1)
13543 .m(4)
13544 .n(16)
13545 .k(k)
13546 .ks(3)
13547 .a_offset(163)
13548 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080013549 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070013550 }
13551 }
13552 }
13553
13554 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, qmin) {
13555 TEST_REQUIRES_ARM_NEON;
13556 GemmMicrokernelTester()
13557 .mr(4)
13558 .nr(16)
13559 .kr(1)
13560 .sr(1)
13561 .m(4)
13562 .n(16)
13563 .k(8)
13564 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013565 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070013566 }
13567
13568 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, qmax) {
13569 TEST_REQUIRES_ARM_NEON;
13570 GemmMicrokernelTester()
13571 .mr(4)
13572 .nr(16)
13573 .kr(1)
13574 .sr(1)
13575 .m(4)
13576 .n(16)
13577 .k(8)
13578 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013579 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070013580 }
13581
13582 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, strided_cm) {
13583 TEST_REQUIRES_ARM_NEON;
13584 GemmMicrokernelTester()
13585 .mr(4)
13586 .nr(16)
13587 .kr(1)
13588 .sr(1)
13589 .m(4)
13590 .n(16)
13591 .k(8)
13592 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080013593 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070013594 }
13595#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
13596
13597
13598#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Frank Barcharde22685a2021-11-12 11:36:58 -080013599 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, k_eq_16) {
Marat Dukhancf055852021-06-26 09:05:09 -070013600 TEST_REQUIRES_ARM_NEON_V8;
13601 GemmMicrokernelTester()
13602 .mr(1)
13603 .nr(8)
13604 .kr(8)
13605 .sr(1)
13606 .m(1)
13607 .n(8)
13608 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080013609 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070013610 }
13611
Frank Barcharde22685a2021-11-12 11:36:58 -080013612 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, strided_cn) {
Marat Dukhancf055852021-06-26 09:05:09 -070013613 TEST_REQUIRES_ARM_NEON_V8;
13614 GemmMicrokernelTester()
13615 .mr(1)
13616 .nr(8)
13617 .kr(8)
13618 .sr(1)
13619 .m(1)
13620 .n(8)
13621 .k(16)
13622 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080013623 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070013624 }
13625
Frank Barcharde22685a2021-11-12 11:36:58 -080013626 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, k_eq_16_subtile) {
Marat Dukhancf055852021-06-26 09:05:09 -070013627 TEST_REQUIRES_ARM_NEON_V8;
Zhi An Ng83844ae2022-01-14 09:52:25 -080013628 for (uint32_t n = 1; n <= 8; n++) {
13629 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhancf055852021-06-26 09:05:09 -070013630 GemmMicrokernelTester()
13631 .mr(1)
13632 .nr(8)
13633 .kr(8)
13634 .sr(1)
13635 .m(m)
13636 .n(n)
13637 .k(16)
13638 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013639 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070013640 }
13641 }
13642 }
13643
Frank Barcharde22685a2021-11-12 11:36:58 -080013644 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, k_eq_16_subtile_m) {
Marat Dukhancf055852021-06-26 09:05:09 -070013645 TEST_REQUIRES_ARM_NEON_V8;
13646 for (uint32_t m = 1; m <= 1; m++) {
13647 GemmMicrokernelTester()
13648 .mr(1)
13649 .nr(8)
13650 .kr(8)
13651 .sr(1)
13652 .m(m)
13653 .n(8)
13654 .k(16)
13655 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013656 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070013657 }
13658 }
13659
Frank Barcharde22685a2021-11-12 11:36:58 -080013660 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, k_eq_16_subtile_n) {
Marat Dukhancf055852021-06-26 09:05:09 -070013661 TEST_REQUIRES_ARM_NEON_V8;
13662 for (uint32_t n = 1; n <= 8; n++) {
13663 GemmMicrokernelTester()
13664 .mr(1)
13665 .nr(8)
13666 .kr(8)
13667 .sr(1)
13668 .m(1)
13669 .n(n)
13670 .k(16)
13671 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013672 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070013673 }
13674 }
13675
Frank Barcharde22685a2021-11-12 11:36:58 -080013676 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, k_lt_16) {
Marat Dukhancf055852021-06-26 09:05:09 -070013677 TEST_REQUIRES_ARM_NEON_V8;
13678 for (size_t k = 1; k < 16; k++) {
13679 GemmMicrokernelTester()
13680 .mr(1)
13681 .nr(8)
13682 .kr(8)
13683 .sr(1)
13684 .m(1)
13685 .n(8)
13686 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080013687 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070013688 }
13689 }
13690
Frank Barcharde22685a2021-11-12 11:36:58 -080013691 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, k_lt_16_subtile) {
Marat Dukhancf055852021-06-26 09:05:09 -070013692 TEST_REQUIRES_ARM_NEON_V8;
13693 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013694 for (uint32_t n = 1; n <= 8; n++) {
13695 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhancf055852021-06-26 09:05:09 -070013696 GemmMicrokernelTester()
13697 .mr(1)
13698 .nr(8)
13699 .kr(8)
13700 .sr(1)
13701 .m(m)
13702 .n(n)
13703 .k(k)
13704 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013705 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070013706 }
13707 }
13708 }
13709 }
13710
Frank Barcharde22685a2021-11-12 11:36:58 -080013711 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, k_gt_16) {
Marat Dukhancf055852021-06-26 09:05:09 -070013712 TEST_REQUIRES_ARM_NEON_V8;
13713 for (size_t k = 17; k < 32; k++) {
13714 GemmMicrokernelTester()
13715 .mr(1)
13716 .nr(8)
13717 .kr(8)
13718 .sr(1)
13719 .m(1)
13720 .n(8)
13721 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080013722 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070013723 }
13724 }
13725
Frank Barcharde22685a2021-11-12 11:36:58 -080013726 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, k_gt_16_subtile) {
Marat Dukhancf055852021-06-26 09:05:09 -070013727 TEST_REQUIRES_ARM_NEON_V8;
13728 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013729 for (uint32_t n = 1; n <= 8; n++) {
13730 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhancf055852021-06-26 09:05:09 -070013731 GemmMicrokernelTester()
13732 .mr(1)
13733 .nr(8)
13734 .kr(8)
13735 .sr(1)
13736 .m(m)
13737 .n(n)
13738 .k(k)
13739 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013740 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070013741 }
13742 }
13743 }
13744 }
13745
Frank Barcharde22685a2021-11-12 11:36:58 -080013746 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, k_div_16) {
Marat Dukhancf055852021-06-26 09:05:09 -070013747 TEST_REQUIRES_ARM_NEON_V8;
13748 for (size_t k = 32; k <= 160; k += 16) {
13749 GemmMicrokernelTester()
13750 .mr(1)
13751 .nr(8)
13752 .kr(8)
13753 .sr(1)
13754 .m(1)
13755 .n(8)
13756 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080013757 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070013758 }
13759 }
13760
Frank Barcharde22685a2021-11-12 11:36:58 -080013761 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, k_div_16_subtile) {
Marat Dukhancf055852021-06-26 09:05:09 -070013762 TEST_REQUIRES_ARM_NEON_V8;
13763 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013764 for (uint32_t n = 1; n <= 8; n++) {
13765 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhancf055852021-06-26 09:05:09 -070013766 GemmMicrokernelTester()
13767 .mr(1)
13768 .nr(8)
13769 .kr(8)
13770 .sr(1)
13771 .m(m)
13772 .n(n)
13773 .k(k)
13774 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013775 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070013776 }
13777 }
13778 }
13779 }
13780
Frank Barcharde22685a2021-11-12 11:36:58 -080013781 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, n_gt_8) {
Marat Dukhancf055852021-06-26 09:05:09 -070013782 TEST_REQUIRES_ARM_NEON_V8;
13783 for (uint32_t n = 9; n < 16; n++) {
13784 for (size_t k = 1; k <= 80; k += 17) {
13785 GemmMicrokernelTester()
13786 .mr(1)
13787 .nr(8)
13788 .kr(8)
13789 .sr(1)
13790 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013791 .n(n)
Marat Dukhancf055852021-06-26 09:05:09 -070013792 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080013793 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070013794 }
13795 }
13796 }
13797
Frank Barcharde22685a2021-11-12 11:36:58 -080013798 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, n_gt_8_strided_cn) {
Marat Dukhancf055852021-06-26 09:05:09 -070013799 TEST_REQUIRES_ARM_NEON_V8;
13800 for (uint32_t n = 9; n < 16; n++) {
13801 for (size_t k = 1; k <= 80; k += 17) {
13802 GemmMicrokernelTester()
13803 .mr(1)
13804 .nr(8)
13805 .kr(8)
13806 .sr(1)
13807 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013808 .n(n)
Marat Dukhancf055852021-06-26 09:05:09 -070013809 .k(k)
13810 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080013811 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070013812 }
13813 }
13814 }
13815
Frank Barcharde22685a2021-11-12 11:36:58 -080013816 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, n_gt_8_subtile) {
Marat Dukhancf055852021-06-26 09:05:09 -070013817 TEST_REQUIRES_ARM_NEON_V8;
13818 for (uint32_t n = 9; n < 16; n++) {
13819 for (size_t k = 1; k <= 80; k += 17) {
13820 for (uint32_t m = 1; m <= 1; m++) {
13821 GemmMicrokernelTester()
13822 .mr(1)
13823 .nr(8)
13824 .kr(8)
13825 .sr(1)
13826 .m(m)
13827 .n(n)
13828 .k(k)
13829 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013830 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070013831 }
13832 }
13833 }
13834 }
13835
Frank Barcharde22685a2021-11-12 11:36:58 -080013836 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, n_div_8) {
Marat Dukhancf055852021-06-26 09:05:09 -070013837 TEST_REQUIRES_ARM_NEON_V8;
13838 for (uint32_t n = 16; n <= 24; n += 8) {
13839 for (size_t k = 1; k <= 80; k += 17) {
13840 GemmMicrokernelTester()
13841 .mr(1)
13842 .nr(8)
13843 .kr(8)
13844 .sr(1)
13845 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013846 .n(n)
Marat Dukhancf055852021-06-26 09:05:09 -070013847 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080013848 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070013849 }
13850 }
13851 }
13852
Frank Barcharde22685a2021-11-12 11:36:58 -080013853 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, n_div_8_strided_cn) {
Marat Dukhancf055852021-06-26 09:05:09 -070013854 TEST_REQUIRES_ARM_NEON_V8;
13855 for (uint32_t n = 16; n <= 24; n += 8) {
13856 for (size_t k = 1; k <= 80; k += 17) {
13857 GemmMicrokernelTester()
13858 .mr(1)
13859 .nr(8)
13860 .kr(8)
13861 .sr(1)
13862 .m(1)
13863 .n(n)
13864 .k(k)
13865 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080013866 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070013867 }
13868 }
13869 }
13870
Frank Barcharde22685a2021-11-12 11:36:58 -080013871 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, n_div_8_subtile) {
Marat Dukhancf055852021-06-26 09:05:09 -070013872 TEST_REQUIRES_ARM_NEON_V8;
13873 for (uint32_t n = 16; n <= 24; n += 8) {
13874 for (size_t k = 1; k <= 80; k += 17) {
13875 for (uint32_t m = 1; m <= 1; m++) {
13876 GemmMicrokernelTester()
13877 .mr(1)
13878 .nr(8)
13879 .kr(8)
13880 .sr(1)
13881 .m(m)
13882 .n(n)
13883 .k(k)
13884 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013885 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070013886 }
13887 }
13888 }
13889 }
13890
Frank Barcharde22685a2021-11-12 11:36:58 -080013891 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, small_kernel) {
Marat Dukhancf055852021-06-26 09:05:09 -070013892 TEST_REQUIRES_ARM_NEON_V8;
13893 for (size_t k = 1; k <= 80; k += 17) {
13894 GemmMicrokernelTester()
13895 .mr(1)
13896 .nr(8)
13897 .kr(8)
13898 .sr(1)
13899 .m(1)
13900 .n(8)
13901 .k(k)
13902 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080013903 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070013904 }
13905 }
13906
Frank Barcharde22685a2021-11-12 11:36:58 -080013907 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, small_kernel_subtile) {
Marat Dukhancf055852021-06-26 09:05:09 -070013908 TEST_REQUIRES_ARM_NEON_V8;
13909 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013910 for (uint32_t n = 1; n <= 8; n++) {
13911 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhancf055852021-06-26 09:05:09 -070013912 GemmMicrokernelTester()
13913 .mr(1)
13914 .nr(8)
13915 .kr(8)
13916 .sr(1)
13917 .m(m)
13918 .n(n)
13919 .k(k)
13920 .ks(3)
13921 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013922 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070013923 }
13924 }
13925 }
13926 }
13927
Frank Barcharde22685a2021-11-12 11:36:58 -080013928 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, n_gt_8_small_kernel) {
Marat Dukhancf055852021-06-26 09:05:09 -070013929 TEST_REQUIRES_ARM_NEON_V8;
13930 for (uint32_t n = 9; n < 16; n++) {
13931 for (size_t k = 1; k <= 80; k += 17) {
13932 GemmMicrokernelTester()
13933 .mr(1)
13934 .nr(8)
13935 .kr(8)
13936 .sr(1)
13937 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013938 .n(n)
Marat Dukhancf055852021-06-26 09:05:09 -070013939 .k(k)
13940 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080013941 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070013942 }
13943 }
13944 }
13945
Frank Barcharde22685a2021-11-12 11:36:58 -080013946 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, n_div_8_small_kernel) {
Marat Dukhancf055852021-06-26 09:05:09 -070013947 TEST_REQUIRES_ARM_NEON_V8;
13948 for (uint32_t n = 16; n <= 24; n += 8) {
13949 for (size_t k = 1; k <= 80; k += 17) {
13950 GemmMicrokernelTester()
13951 .mr(1)
13952 .nr(8)
13953 .kr(8)
13954 .sr(1)
13955 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013956 .n(n)
Marat Dukhancf055852021-06-26 09:05:09 -070013957 .k(k)
13958 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080013959 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070013960 }
13961 }
13962 }
13963
Frank Barcharde22685a2021-11-12 11:36:58 -080013964 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, strided_cm_subtile) {
Marat Dukhancf055852021-06-26 09:05:09 -070013965 TEST_REQUIRES_ARM_NEON_V8;
13966 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013967 for (uint32_t n = 1; n <= 8; n++) {
13968 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhancf055852021-06-26 09:05:09 -070013969 GemmMicrokernelTester()
13970 .mr(1)
13971 .nr(8)
13972 .kr(8)
13973 .sr(1)
13974 .m(m)
13975 .n(n)
13976 .k(k)
13977 .cm_stride(11)
13978 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013979 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070013980 }
13981 }
13982 }
13983 }
13984
Frank Barcharde22685a2021-11-12 11:36:58 -080013985 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, a_offset) {
Marat Dukhancf055852021-06-26 09:05:09 -070013986 TEST_REQUIRES_ARM_NEON_V8;
13987 for (size_t k = 1; k <= 80; k += 17) {
13988 GemmMicrokernelTester()
13989 .mr(1)
13990 .nr(8)
13991 .kr(8)
13992 .sr(1)
13993 .m(1)
13994 .n(8)
13995 .k(k)
13996 .ks(3)
13997 .a_offset(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080013998 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070013999 }
14000 }
14001
Frank Barcharde22685a2021-11-12 11:36:58 -080014002 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, zero) {
Marat Dukhancf055852021-06-26 09:05:09 -070014003 TEST_REQUIRES_ARM_NEON_V8;
Zhi An Ng83844ae2022-01-14 09:52:25 -080014004 for (size_t k = 1; k <= 80; k += 17) {
14005 for (uint32_t mz = 0; mz < 1; mz++) {
Marat Dukhancf055852021-06-26 09:05:09 -070014006 GemmMicrokernelTester()
14007 .mr(1)
14008 .nr(8)
14009 .kr(8)
14010 .sr(1)
14011 .m(1)
14012 .n(8)
14013 .k(k)
14014 .ks(3)
14015 .a_offset(83)
14016 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080014017 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070014018 }
14019 }
14020 }
14021
Frank Barcharde22685a2021-11-12 11:36:58 -080014022 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, qmin) {
Marat Dukhancf055852021-06-26 09:05:09 -070014023 TEST_REQUIRES_ARM_NEON_V8;
14024 GemmMicrokernelTester()
14025 .mr(1)
14026 .nr(8)
14027 .kr(8)
14028 .sr(1)
14029 .m(1)
14030 .n(8)
14031 .k(16)
14032 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014033 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070014034 }
14035
Frank Barcharde22685a2021-11-12 11:36:58 -080014036 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, qmax) {
Marat Dukhancf055852021-06-26 09:05:09 -070014037 TEST_REQUIRES_ARM_NEON_V8;
14038 GemmMicrokernelTester()
14039 .mr(1)
14040 .nr(8)
14041 .kr(8)
14042 .sr(1)
14043 .m(1)
14044 .n(8)
14045 .k(16)
14046 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014047 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070014048 }
14049
Frank Barcharde22685a2021-11-12 11:36:58 -080014050 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, strided_cm) {
Marat Dukhancf055852021-06-26 09:05:09 -070014051 TEST_REQUIRES_ARM_NEON_V8;
14052 GemmMicrokernelTester()
14053 .mr(1)
14054 .nr(8)
14055 .kr(8)
14056 .sr(1)
14057 .m(1)
14058 .n(8)
14059 .k(16)
14060 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080014061 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070014062 }
14063#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
14064
14065
14066#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Frank Barcharde22685a2021-11-12 11:36:58 -080014067 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, k_eq_16) {
Marat Dukhancf055852021-06-26 09:05:09 -070014068 TEST_REQUIRES_ARM_NEON_V8;
14069 GemmMicrokernelTester()
14070 .mr(2)
14071 .nr(8)
14072 .kr(8)
14073 .sr(1)
14074 .m(2)
14075 .n(8)
14076 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080014077 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070014078 }
14079
Frank Barcharde22685a2021-11-12 11:36:58 -080014080 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, strided_cn) {
Marat Dukhancf055852021-06-26 09:05:09 -070014081 TEST_REQUIRES_ARM_NEON_V8;
14082 GemmMicrokernelTester()
14083 .mr(2)
14084 .nr(8)
14085 .kr(8)
14086 .sr(1)
14087 .m(2)
14088 .n(8)
14089 .k(16)
14090 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080014091 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070014092 }
14093
Frank Barcharde22685a2021-11-12 11:36:58 -080014094 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, k_eq_16_subtile) {
Marat Dukhancf055852021-06-26 09:05:09 -070014095 TEST_REQUIRES_ARM_NEON_V8;
Zhi An Ng83844ae2022-01-14 09:52:25 -080014096 for (uint32_t n = 1; n <= 8; n++) {
14097 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhancf055852021-06-26 09:05:09 -070014098 GemmMicrokernelTester()
14099 .mr(2)
14100 .nr(8)
14101 .kr(8)
14102 .sr(1)
14103 .m(m)
14104 .n(n)
14105 .k(16)
14106 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014107 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070014108 }
14109 }
14110 }
14111
Frank Barcharde22685a2021-11-12 11:36:58 -080014112 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, k_eq_16_subtile_m) {
Marat Dukhancf055852021-06-26 09:05:09 -070014113 TEST_REQUIRES_ARM_NEON_V8;
14114 for (uint32_t m = 1; m <= 2; m++) {
14115 GemmMicrokernelTester()
14116 .mr(2)
14117 .nr(8)
14118 .kr(8)
14119 .sr(1)
14120 .m(m)
14121 .n(8)
14122 .k(16)
14123 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014124 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070014125 }
14126 }
14127
Frank Barcharde22685a2021-11-12 11:36:58 -080014128 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, k_eq_16_subtile_n) {
Marat Dukhancf055852021-06-26 09:05:09 -070014129 TEST_REQUIRES_ARM_NEON_V8;
14130 for (uint32_t n = 1; n <= 8; n++) {
14131 GemmMicrokernelTester()
14132 .mr(2)
14133 .nr(8)
14134 .kr(8)
14135 .sr(1)
14136 .m(2)
14137 .n(n)
14138 .k(16)
14139 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014140 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070014141 }
14142 }
14143
Frank Barcharde22685a2021-11-12 11:36:58 -080014144 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, k_lt_16) {
Marat Dukhancf055852021-06-26 09:05:09 -070014145 TEST_REQUIRES_ARM_NEON_V8;
14146 for (size_t k = 1; k < 16; k++) {
14147 GemmMicrokernelTester()
14148 .mr(2)
14149 .nr(8)
14150 .kr(8)
14151 .sr(1)
14152 .m(2)
14153 .n(8)
14154 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080014155 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070014156 }
14157 }
14158
Frank Barcharde22685a2021-11-12 11:36:58 -080014159 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, k_lt_16_subtile) {
Marat Dukhancf055852021-06-26 09:05:09 -070014160 TEST_REQUIRES_ARM_NEON_V8;
14161 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014162 for (uint32_t n = 1; n <= 8; n++) {
14163 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhancf055852021-06-26 09:05:09 -070014164 GemmMicrokernelTester()
14165 .mr(2)
14166 .nr(8)
14167 .kr(8)
14168 .sr(1)
14169 .m(m)
14170 .n(n)
14171 .k(k)
14172 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014173 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070014174 }
14175 }
14176 }
14177 }
14178
Frank Barcharde22685a2021-11-12 11:36:58 -080014179 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, k_gt_16) {
Marat Dukhancf055852021-06-26 09:05:09 -070014180 TEST_REQUIRES_ARM_NEON_V8;
14181 for (size_t k = 17; k < 32; k++) {
14182 GemmMicrokernelTester()
14183 .mr(2)
14184 .nr(8)
14185 .kr(8)
14186 .sr(1)
14187 .m(2)
14188 .n(8)
14189 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080014190 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070014191 }
14192 }
14193
Frank Barcharde22685a2021-11-12 11:36:58 -080014194 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, k_gt_16_subtile) {
Marat Dukhancf055852021-06-26 09:05:09 -070014195 TEST_REQUIRES_ARM_NEON_V8;
14196 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014197 for (uint32_t n = 1; n <= 8; n++) {
14198 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhancf055852021-06-26 09:05:09 -070014199 GemmMicrokernelTester()
14200 .mr(2)
14201 .nr(8)
14202 .kr(8)
14203 .sr(1)
14204 .m(m)
14205 .n(n)
14206 .k(k)
14207 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014208 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070014209 }
14210 }
14211 }
14212 }
14213
Frank Barcharde22685a2021-11-12 11:36:58 -080014214 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, k_div_16) {
Marat Dukhancf055852021-06-26 09:05:09 -070014215 TEST_REQUIRES_ARM_NEON_V8;
14216 for (size_t k = 32; k <= 160; k += 16) {
14217 GemmMicrokernelTester()
14218 .mr(2)
14219 .nr(8)
14220 .kr(8)
14221 .sr(1)
14222 .m(2)
14223 .n(8)
14224 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080014225 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070014226 }
14227 }
14228
Frank Barcharde22685a2021-11-12 11:36:58 -080014229 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, k_div_16_subtile) {
Marat Dukhancf055852021-06-26 09:05:09 -070014230 TEST_REQUIRES_ARM_NEON_V8;
14231 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014232 for (uint32_t n = 1; n <= 8; n++) {
14233 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhancf055852021-06-26 09:05:09 -070014234 GemmMicrokernelTester()
14235 .mr(2)
14236 .nr(8)
14237 .kr(8)
14238 .sr(1)
14239 .m(m)
14240 .n(n)
14241 .k(k)
14242 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014243 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070014244 }
14245 }
14246 }
14247 }
14248
Frank Barcharde22685a2021-11-12 11:36:58 -080014249 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, n_gt_8) {
Marat Dukhancf055852021-06-26 09:05:09 -070014250 TEST_REQUIRES_ARM_NEON_V8;
14251 for (uint32_t n = 9; n < 16; n++) {
14252 for (size_t k = 1; k <= 80; k += 17) {
14253 GemmMicrokernelTester()
14254 .mr(2)
14255 .nr(8)
14256 .kr(8)
14257 .sr(1)
14258 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014259 .n(n)
Marat Dukhancf055852021-06-26 09:05:09 -070014260 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080014261 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070014262 }
14263 }
14264 }
14265
Frank Barcharde22685a2021-11-12 11:36:58 -080014266 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, n_gt_8_strided_cn) {
Marat Dukhancf055852021-06-26 09:05:09 -070014267 TEST_REQUIRES_ARM_NEON_V8;
14268 for (uint32_t n = 9; n < 16; n++) {
14269 for (size_t k = 1; k <= 80; k += 17) {
14270 GemmMicrokernelTester()
14271 .mr(2)
14272 .nr(8)
14273 .kr(8)
14274 .sr(1)
14275 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014276 .n(n)
Marat Dukhancf055852021-06-26 09:05:09 -070014277 .k(k)
14278 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080014279 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070014280 }
14281 }
14282 }
14283
Frank Barcharde22685a2021-11-12 11:36:58 -080014284 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, n_gt_8_subtile) {
Marat Dukhancf055852021-06-26 09:05:09 -070014285 TEST_REQUIRES_ARM_NEON_V8;
14286 for (uint32_t n = 9; n < 16; n++) {
14287 for (size_t k = 1; k <= 80; k += 17) {
14288 for (uint32_t m = 1; m <= 2; m++) {
14289 GemmMicrokernelTester()
14290 .mr(2)
14291 .nr(8)
14292 .kr(8)
14293 .sr(1)
14294 .m(m)
14295 .n(n)
14296 .k(k)
14297 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014298 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070014299 }
14300 }
14301 }
14302 }
14303
Frank Barcharde22685a2021-11-12 11:36:58 -080014304 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, n_div_8) {
Marat Dukhancf055852021-06-26 09:05:09 -070014305 TEST_REQUIRES_ARM_NEON_V8;
14306 for (uint32_t n = 16; n <= 24; n += 8) {
14307 for (size_t k = 1; k <= 80; k += 17) {
14308 GemmMicrokernelTester()
14309 .mr(2)
14310 .nr(8)
14311 .kr(8)
14312 .sr(1)
14313 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014314 .n(n)
Marat Dukhancf055852021-06-26 09:05:09 -070014315 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080014316 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070014317 }
14318 }
14319 }
14320
Frank Barcharde22685a2021-11-12 11:36:58 -080014321 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, n_div_8_strided_cn) {
Marat Dukhancf055852021-06-26 09:05:09 -070014322 TEST_REQUIRES_ARM_NEON_V8;
14323 for (uint32_t n = 16; n <= 24; n += 8) {
14324 for (size_t k = 1; k <= 80; k += 17) {
14325 GemmMicrokernelTester()
14326 .mr(2)
14327 .nr(8)
14328 .kr(8)
14329 .sr(1)
14330 .m(2)
14331 .n(n)
14332 .k(k)
14333 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080014334 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070014335 }
14336 }
14337 }
14338
Frank Barcharde22685a2021-11-12 11:36:58 -080014339 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, n_div_8_subtile) {
Marat Dukhancf055852021-06-26 09:05:09 -070014340 TEST_REQUIRES_ARM_NEON_V8;
14341 for (uint32_t n = 16; n <= 24; n += 8) {
14342 for (size_t k = 1; k <= 80; k += 17) {
14343 for (uint32_t m = 1; m <= 2; m++) {
14344 GemmMicrokernelTester()
14345 .mr(2)
14346 .nr(8)
14347 .kr(8)
14348 .sr(1)
14349 .m(m)
14350 .n(n)
14351 .k(k)
14352 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014353 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070014354 }
14355 }
14356 }
14357 }
14358
Frank Barcharde22685a2021-11-12 11:36:58 -080014359 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, small_kernel) {
Marat Dukhancf055852021-06-26 09:05:09 -070014360 TEST_REQUIRES_ARM_NEON_V8;
14361 for (size_t k = 1; k <= 80; k += 17) {
14362 GemmMicrokernelTester()
14363 .mr(2)
14364 .nr(8)
14365 .kr(8)
14366 .sr(1)
14367 .m(2)
14368 .n(8)
14369 .k(k)
14370 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080014371 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070014372 }
14373 }
14374
Frank Barcharde22685a2021-11-12 11:36:58 -080014375 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, small_kernel_subtile) {
Marat Dukhancf055852021-06-26 09:05:09 -070014376 TEST_REQUIRES_ARM_NEON_V8;
14377 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014378 for (uint32_t n = 1; n <= 8; n++) {
14379 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhancf055852021-06-26 09:05:09 -070014380 GemmMicrokernelTester()
14381 .mr(2)
14382 .nr(8)
14383 .kr(8)
14384 .sr(1)
14385 .m(m)
14386 .n(n)
14387 .k(k)
14388 .ks(3)
14389 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014390 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070014391 }
14392 }
14393 }
14394 }
14395
Frank Barcharde22685a2021-11-12 11:36:58 -080014396 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, n_gt_8_small_kernel) {
Marat Dukhancf055852021-06-26 09:05:09 -070014397 TEST_REQUIRES_ARM_NEON_V8;
14398 for (uint32_t n = 9; n < 16; n++) {
14399 for (size_t k = 1; k <= 80; k += 17) {
14400 GemmMicrokernelTester()
14401 .mr(2)
14402 .nr(8)
14403 .kr(8)
14404 .sr(1)
14405 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014406 .n(n)
Marat Dukhancf055852021-06-26 09:05:09 -070014407 .k(k)
14408 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080014409 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070014410 }
14411 }
14412 }
14413
Frank Barcharde22685a2021-11-12 11:36:58 -080014414 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, n_div_8_small_kernel) {
Marat Dukhancf055852021-06-26 09:05:09 -070014415 TEST_REQUIRES_ARM_NEON_V8;
14416 for (uint32_t n = 16; n <= 24; n += 8) {
14417 for (size_t k = 1; k <= 80; k += 17) {
14418 GemmMicrokernelTester()
14419 .mr(2)
14420 .nr(8)
14421 .kr(8)
14422 .sr(1)
14423 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014424 .n(n)
Marat Dukhancf055852021-06-26 09:05:09 -070014425 .k(k)
14426 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080014427 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070014428 }
14429 }
14430 }
14431
Frank Barcharde22685a2021-11-12 11:36:58 -080014432 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, strided_cm_subtile) {
Marat Dukhancf055852021-06-26 09:05:09 -070014433 TEST_REQUIRES_ARM_NEON_V8;
14434 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014435 for (uint32_t n = 1; n <= 8; n++) {
14436 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhancf055852021-06-26 09:05:09 -070014437 GemmMicrokernelTester()
14438 .mr(2)
14439 .nr(8)
14440 .kr(8)
14441 .sr(1)
14442 .m(m)
14443 .n(n)
14444 .k(k)
14445 .cm_stride(11)
14446 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014447 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070014448 }
14449 }
14450 }
14451 }
14452
Frank Barcharde22685a2021-11-12 11:36:58 -080014453 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, a_offset) {
Marat Dukhancf055852021-06-26 09:05:09 -070014454 TEST_REQUIRES_ARM_NEON_V8;
14455 for (size_t k = 1; k <= 80; k += 17) {
14456 GemmMicrokernelTester()
14457 .mr(2)
14458 .nr(8)
14459 .kr(8)
14460 .sr(1)
14461 .m(2)
14462 .n(8)
14463 .k(k)
14464 .ks(3)
14465 .a_offset(163)
Marat Dukhan50323b82022-01-11 00:12:01 -080014466 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070014467 }
14468 }
14469
Frank Barcharde22685a2021-11-12 11:36:58 -080014470 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, zero) {
Marat Dukhancf055852021-06-26 09:05:09 -070014471 TEST_REQUIRES_ARM_NEON_V8;
Zhi An Ng83844ae2022-01-14 09:52:25 -080014472 for (size_t k = 1; k <= 80; k += 17) {
14473 for (uint32_t mz = 0; mz < 2; mz++) {
Marat Dukhancf055852021-06-26 09:05:09 -070014474 GemmMicrokernelTester()
14475 .mr(2)
14476 .nr(8)
14477 .kr(8)
14478 .sr(1)
14479 .m(2)
14480 .n(8)
14481 .k(k)
14482 .ks(3)
14483 .a_offset(163)
14484 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080014485 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070014486 }
14487 }
14488 }
14489
Frank Barcharde22685a2021-11-12 11:36:58 -080014490 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, qmin) {
Marat Dukhancf055852021-06-26 09:05:09 -070014491 TEST_REQUIRES_ARM_NEON_V8;
14492 GemmMicrokernelTester()
14493 .mr(2)
14494 .nr(8)
14495 .kr(8)
14496 .sr(1)
14497 .m(2)
14498 .n(8)
14499 .k(16)
14500 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014501 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070014502 }
14503
Frank Barcharde22685a2021-11-12 11:36:58 -080014504 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, qmax) {
Marat Dukhancf055852021-06-26 09:05:09 -070014505 TEST_REQUIRES_ARM_NEON_V8;
14506 GemmMicrokernelTester()
14507 .mr(2)
14508 .nr(8)
14509 .kr(8)
14510 .sr(1)
14511 .m(2)
14512 .n(8)
14513 .k(16)
14514 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014515 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070014516 }
14517
Frank Barcharde22685a2021-11-12 11:36:58 -080014518 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, strided_cm) {
Marat Dukhancf055852021-06-26 09:05:09 -070014519 TEST_REQUIRES_ARM_NEON_V8;
14520 GemmMicrokernelTester()
14521 .mr(2)
14522 .nr(8)
14523 .kr(8)
14524 .sr(1)
14525 .m(2)
14526 .n(8)
14527 .k(16)
14528 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080014529 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhancf055852021-06-26 09:05:09 -070014530 }
14531#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
14532
14533
Marat Dukhan18630de2021-06-02 22:20:01 -070014534#if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
14535 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, k_eq_8) {
14536 TEST_REQUIRES_ARM_NEON_DOT;
14537 GemmMicrokernelTester()
14538 .mr(1)
14539 .nr(8)
14540 .kr(4)
14541 .sr(1)
14542 .m(1)
14543 .n(8)
14544 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080014545 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan18630de2021-06-02 22:20:01 -070014546 }
14547
14548 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, strided_cn) {
14549 TEST_REQUIRES_ARM_NEON_DOT;
14550 GemmMicrokernelTester()
14551 .mr(1)
14552 .nr(8)
14553 .kr(4)
14554 .sr(1)
14555 .m(1)
14556 .n(8)
14557 .k(8)
14558 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080014559 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan18630de2021-06-02 22:20:01 -070014560 }
14561
14562 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, k_eq_8_subtile) {
14563 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng83844ae2022-01-14 09:52:25 -080014564 for (uint32_t n = 1; n <= 8; n++) {
14565 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan18630de2021-06-02 22:20:01 -070014566 GemmMicrokernelTester()
14567 .mr(1)
14568 .nr(8)
14569 .kr(4)
14570 .sr(1)
14571 .m(m)
14572 .n(n)
14573 .k(8)
14574 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014575 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan18630de2021-06-02 22:20:01 -070014576 }
14577 }
14578 }
14579
14580 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, k_eq_8_subtile_m) {
14581 TEST_REQUIRES_ARM_NEON_DOT;
14582 for (uint32_t m = 1; m <= 1; m++) {
14583 GemmMicrokernelTester()
14584 .mr(1)
14585 .nr(8)
14586 .kr(4)
14587 .sr(1)
14588 .m(m)
14589 .n(8)
14590 .k(8)
14591 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014592 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan18630de2021-06-02 22:20:01 -070014593 }
14594 }
14595
14596 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, k_eq_8_subtile_n) {
14597 TEST_REQUIRES_ARM_NEON_DOT;
14598 for (uint32_t n = 1; n <= 8; n++) {
14599 GemmMicrokernelTester()
14600 .mr(1)
14601 .nr(8)
14602 .kr(4)
14603 .sr(1)
14604 .m(1)
14605 .n(n)
14606 .k(8)
14607 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014608 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan18630de2021-06-02 22:20:01 -070014609 }
14610 }
14611
14612 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, k_lt_8) {
14613 TEST_REQUIRES_ARM_NEON_DOT;
14614 for (size_t k = 1; k < 8; k++) {
14615 GemmMicrokernelTester()
14616 .mr(1)
14617 .nr(8)
14618 .kr(4)
14619 .sr(1)
14620 .m(1)
14621 .n(8)
14622 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080014623 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan18630de2021-06-02 22:20:01 -070014624 }
14625 }
14626
14627 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, k_lt_8_subtile) {
14628 TEST_REQUIRES_ARM_NEON_DOT;
14629 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014630 for (uint32_t n = 1; n <= 8; n++) {
14631 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan18630de2021-06-02 22:20:01 -070014632 GemmMicrokernelTester()
14633 .mr(1)
14634 .nr(8)
14635 .kr(4)
14636 .sr(1)
14637 .m(m)
14638 .n(n)
14639 .k(k)
14640 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014641 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan18630de2021-06-02 22:20:01 -070014642 }
14643 }
14644 }
14645 }
14646
14647 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, k_gt_8) {
14648 TEST_REQUIRES_ARM_NEON_DOT;
14649 for (size_t k = 9; k < 16; k++) {
14650 GemmMicrokernelTester()
14651 .mr(1)
14652 .nr(8)
14653 .kr(4)
14654 .sr(1)
14655 .m(1)
14656 .n(8)
14657 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080014658 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan18630de2021-06-02 22:20:01 -070014659 }
14660 }
14661
14662 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, k_gt_8_subtile) {
14663 TEST_REQUIRES_ARM_NEON_DOT;
14664 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014665 for (uint32_t n = 1; n <= 8; n++) {
14666 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan18630de2021-06-02 22:20:01 -070014667 GemmMicrokernelTester()
14668 .mr(1)
14669 .nr(8)
14670 .kr(4)
14671 .sr(1)
14672 .m(m)
14673 .n(n)
14674 .k(k)
14675 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014676 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan18630de2021-06-02 22:20:01 -070014677 }
14678 }
14679 }
14680 }
14681
14682 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, k_div_8) {
14683 TEST_REQUIRES_ARM_NEON_DOT;
14684 for (size_t k = 16; k <= 80; k += 8) {
14685 GemmMicrokernelTester()
14686 .mr(1)
14687 .nr(8)
14688 .kr(4)
14689 .sr(1)
14690 .m(1)
14691 .n(8)
14692 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080014693 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan18630de2021-06-02 22:20:01 -070014694 }
14695 }
14696
14697 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, k_div_8_subtile) {
14698 TEST_REQUIRES_ARM_NEON_DOT;
14699 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014700 for (uint32_t n = 1; n <= 8; n++) {
14701 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan18630de2021-06-02 22:20:01 -070014702 GemmMicrokernelTester()
14703 .mr(1)
14704 .nr(8)
14705 .kr(4)
14706 .sr(1)
14707 .m(m)
14708 .n(n)
14709 .k(k)
14710 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014711 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan18630de2021-06-02 22:20:01 -070014712 }
14713 }
14714 }
14715 }
14716
14717 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, n_gt_8) {
14718 TEST_REQUIRES_ARM_NEON_DOT;
14719 for (uint32_t n = 9; n < 16; n++) {
14720 for (size_t k = 1; k <= 40; k += 9) {
14721 GemmMicrokernelTester()
14722 .mr(1)
14723 .nr(8)
14724 .kr(4)
14725 .sr(1)
14726 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014727 .n(n)
Marat Dukhan18630de2021-06-02 22:20:01 -070014728 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080014729 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan18630de2021-06-02 22:20:01 -070014730 }
14731 }
14732 }
14733
14734 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, n_gt_8_strided_cn) {
14735 TEST_REQUIRES_ARM_NEON_DOT;
14736 for (uint32_t n = 9; n < 16; n++) {
14737 for (size_t k = 1; k <= 40; k += 9) {
14738 GemmMicrokernelTester()
14739 .mr(1)
14740 .nr(8)
14741 .kr(4)
14742 .sr(1)
14743 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014744 .n(n)
Marat Dukhan18630de2021-06-02 22:20:01 -070014745 .k(k)
14746 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080014747 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan18630de2021-06-02 22:20:01 -070014748 }
14749 }
14750 }
14751
14752 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, n_gt_8_subtile) {
14753 TEST_REQUIRES_ARM_NEON_DOT;
14754 for (uint32_t n = 9; n < 16; n++) {
14755 for (size_t k = 1; k <= 40; k += 9) {
14756 for (uint32_t m = 1; m <= 1; m++) {
14757 GemmMicrokernelTester()
14758 .mr(1)
14759 .nr(8)
14760 .kr(4)
14761 .sr(1)
14762 .m(m)
14763 .n(n)
14764 .k(k)
14765 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014766 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan18630de2021-06-02 22:20:01 -070014767 }
14768 }
14769 }
14770 }
14771
14772 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, n_div_8) {
14773 TEST_REQUIRES_ARM_NEON_DOT;
14774 for (uint32_t n = 16; n <= 24; n += 8) {
14775 for (size_t k = 1; k <= 40; k += 9) {
14776 GemmMicrokernelTester()
14777 .mr(1)
14778 .nr(8)
14779 .kr(4)
14780 .sr(1)
14781 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014782 .n(n)
Marat Dukhan18630de2021-06-02 22:20:01 -070014783 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080014784 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan18630de2021-06-02 22:20:01 -070014785 }
14786 }
14787 }
14788
14789 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, n_div_8_strided_cn) {
14790 TEST_REQUIRES_ARM_NEON_DOT;
14791 for (uint32_t n = 16; n <= 24; n += 8) {
14792 for (size_t k = 1; k <= 40; k += 9) {
14793 GemmMicrokernelTester()
14794 .mr(1)
14795 .nr(8)
14796 .kr(4)
14797 .sr(1)
14798 .m(1)
14799 .n(n)
14800 .k(k)
14801 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080014802 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan18630de2021-06-02 22:20:01 -070014803 }
14804 }
14805 }
14806
14807 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, n_div_8_subtile) {
14808 TEST_REQUIRES_ARM_NEON_DOT;
14809 for (uint32_t n = 16; n <= 24; n += 8) {
14810 for (size_t k = 1; k <= 40; k += 9) {
14811 for (uint32_t m = 1; m <= 1; m++) {
14812 GemmMicrokernelTester()
14813 .mr(1)
14814 .nr(8)
14815 .kr(4)
14816 .sr(1)
14817 .m(m)
14818 .n(n)
14819 .k(k)
14820 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014821 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan18630de2021-06-02 22:20:01 -070014822 }
14823 }
14824 }
14825 }
14826
14827 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, small_kernel) {
14828 TEST_REQUIRES_ARM_NEON_DOT;
14829 for (size_t k = 1; k <= 40; k += 9) {
14830 GemmMicrokernelTester()
14831 .mr(1)
14832 .nr(8)
14833 .kr(4)
14834 .sr(1)
14835 .m(1)
14836 .n(8)
14837 .k(k)
14838 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080014839 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan18630de2021-06-02 22:20:01 -070014840 }
14841 }
14842
14843 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, small_kernel_subtile) {
14844 TEST_REQUIRES_ARM_NEON_DOT;
14845 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014846 for (uint32_t n = 1; n <= 8; n++) {
14847 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan18630de2021-06-02 22:20:01 -070014848 GemmMicrokernelTester()
14849 .mr(1)
14850 .nr(8)
14851 .kr(4)
14852 .sr(1)
14853 .m(m)
14854 .n(n)
14855 .k(k)
14856 .ks(3)
14857 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014858 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan18630de2021-06-02 22:20:01 -070014859 }
14860 }
14861 }
14862 }
14863
14864 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, n_gt_8_small_kernel) {
14865 TEST_REQUIRES_ARM_NEON_DOT;
14866 for (uint32_t n = 9; n < 16; n++) {
14867 for (size_t k = 1; k <= 40; k += 9) {
14868 GemmMicrokernelTester()
14869 .mr(1)
14870 .nr(8)
14871 .kr(4)
14872 .sr(1)
14873 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014874 .n(n)
Marat Dukhan18630de2021-06-02 22:20:01 -070014875 .k(k)
14876 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080014877 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan18630de2021-06-02 22:20:01 -070014878 }
14879 }
14880 }
14881
14882 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, n_div_8_small_kernel) {
14883 TEST_REQUIRES_ARM_NEON_DOT;
14884 for (uint32_t n = 16; n <= 24; n += 8) {
14885 for (size_t k = 1; k <= 40; k += 9) {
14886 GemmMicrokernelTester()
14887 .mr(1)
14888 .nr(8)
14889 .kr(4)
14890 .sr(1)
14891 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014892 .n(n)
Marat Dukhan18630de2021-06-02 22:20:01 -070014893 .k(k)
14894 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080014895 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan18630de2021-06-02 22:20:01 -070014896 }
14897 }
14898 }
14899
14900 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, strided_cm_subtile) {
14901 TEST_REQUIRES_ARM_NEON_DOT;
14902 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014903 for (uint32_t n = 1; n <= 8; n++) {
14904 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan18630de2021-06-02 22:20:01 -070014905 GemmMicrokernelTester()
14906 .mr(1)
14907 .nr(8)
14908 .kr(4)
14909 .sr(1)
14910 .m(m)
14911 .n(n)
14912 .k(k)
14913 .cm_stride(11)
14914 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014915 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan18630de2021-06-02 22:20:01 -070014916 }
14917 }
14918 }
14919 }
14920
14921 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, a_offset) {
14922 TEST_REQUIRES_ARM_NEON_DOT;
14923 for (size_t k = 1; k <= 40; k += 9) {
14924 GemmMicrokernelTester()
14925 .mr(1)
14926 .nr(8)
14927 .kr(4)
14928 .sr(1)
14929 .m(1)
14930 .n(8)
14931 .k(k)
14932 .ks(3)
14933 .a_offset(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080014934 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan18630de2021-06-02 22:20:01 -070014935 }
14936 }
14937
14938 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, zero) {
14939 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng83844ae2022-01-14 09:52:25 -080014940 for (size_t k = 1; k <= 40; k += 9) {
14941 for (uint32_t mz = 0; mz < 1; mz++) {
Marat Dukhan18630de2021-06-02 22:20:01 -070014942 GemmMicrokernelTester()
14943 .mr(1)
14944 .nr(8)
14945 .kr(4)
14946 .sr(1)
14947 .m(1)
14948 .n(8)
14949 .k(k)
14950 .ks(3)
14951 .a_offset(43)
14952 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080014953 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan18630de2021-06-02 22:20:01 -070014954 }
14955 }
14956 }
14957
14958 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, qmin) {
14959 TEST_REQUIRES_ARM_NEON_DOT;
14960 GemmMicrokernelTester()
14961 .mr(1)
14962 .nr(8)
14963 .kr(4)
14964 .sr(1)
14965 .m(1)
14966 .n(8)
14967 .k(8)
14968 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014969 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan18630de2021-06-02 22:20:01 -070014970 }
14971
14972 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, qmax) {
14973 TEST_REQUIRES_ARM_NEON_DOT;
14974 GemmMicrokernelTester()
14975 .mr(1)
14976 .nr(8)
14977 .kr(4)
14978 .sr(1)
14979 .m(1)
14980 .n(8)
14981 .k(8)
14982 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014983 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan18630de2021-06-02 22:20:01 -070014984 }
14985
14986 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, strided_cm) {
14987 TEST_REQUIRES_ARM_NEON_DOT;
14988 GemmMicrokernelTester()
14989 .mr(1)
14990 .nr(8)
14991 .kr(4)
14992 .sr(1)
14993 .m(1)
14994 .n(8)
14995 .k(8)
14996 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080014997 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan18630de2021-06-02 22:20:01 -070014998 }
14999#endif // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
15000
15001
Marat Dukhan9b474cf2021-05-25 16:37:48 -070015002#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan801d2c22021-06-02 21:25:05 -070015003 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_eq_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070015004 TEST_REQUIRES_X86_SSE2;
15005 GemmMicrokernelTester()
15006 .mr(3)
15007 .nr(4)
15008 .kr(2)
15009 .sr(1)
15010 .m(3)
15011 .n(4)
15012 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080015013 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015014 }
15015
Marat Dukhan801d2c22021-06-02 21:25:05 -070015016 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070015017 TEST_REQUIRES_X86_SSE2;
15018 GemmMicrokernelTester()
15019 .mr(3)
15020 .nr(4)
15021 .kr(2)
15022 .sr(1)
15023 .m(3)
15024 .n(4)
15025 .k(8)
15026 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080015027 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015028 }
15029
Marat Dukhan801d2c22021-06-02 21:25:05 -070015030 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_eq_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070015031 TEST_REQUIRES_X86_SSE2;
Zhi An Ng83844ae2022-01-14 09:52:25 -080015032 for (uint32_t n = 1; n <= 4; n++) {
15033 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070015034 GemmMicrokernelTester()
15035 .mr(3)
15036 .nr(4)
15037 .kr(2)
15038 .sr(1)
15039 .m(m)
15040 .n(n)
15041 .k(8)
15042 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015043 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015044 }
15045 }
15046 }
15047
Marat Dukhan801d2c22021-06-02 21:25:05 -070015048 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_eq_8_subtile_m) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070015049 TEST_REQUIRES_X86_SSE2;
15050 for (uint32_t m = 1; m <= 3; m++) {
15051 GemmMicrokernelTester()
15052 .mr(3)
15053 .nr(4)
15054 .kr(2)
15055 .sr(1)
15056 .m(m)
15057 .n(4)
15058 .k(8)
15059 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015060 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015061 }
15062 }
15063
Marat Dukhan801d2c22021-06-02 21:25:05 -070015064 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_eq_8_subtile_n) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070015065 TEST_REQUIRES_X86_SSE2;
15066 for (uint32_t n = 1; n <= 4; n++) {
15067 GemmMicrokernelTester()
15068 .mr(3)
15069 .nr(4)
15070 .kr(2)
15071 .sr(1)
15072 .m(3)
15073 .n(n)
15074 .k(8)
15075 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015076 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015077 }
15078 }
15079
Marat Dukhan801d2c22021-06-02 21:25:05 -070015080 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_lt_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070015081 TEST_REQUIRES_X86_SSE2;
15082 for (size_t k = 1; k < 8; k++) {
15083 GemmMicrokernelTester()
15084 .mr(3)
15085 .nr(4)
15086 .kr(2)
15087 .sr(1)
15088 .m(3)
15089 .n(4)
15090 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080015091 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015092 }
15093 }
15094
Marat Dukhan801d2c22021-06-02 21:25:05 -070015095 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_lt_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070015096 TEST_REQUIRES_X86_SSE2;
15097 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015098 for (uint32_t n = 1; n <= 4; n++) {
15099 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070015100 GemmMicrokernelTester()
15101 .mr(3)
15102 .nr(4)
15103 .kr(2)
15104 .sr(1)
15105 .m(m)
15106 .n(n)
15107 .k(k)
15108 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015109 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015110 }
15111 }
15112 }
15113 }
15114
Marat Dukhan801d2c22021-06-02 21:25:05 -070015115 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_gt_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070015116 TEST_REQUIRES_X86_SSE2;
15117 for (size_t k = 9; k < 16; k++) {
15118 GemmMicrokernelTester()
15119 .mr(3)
15120 .nr(4)
15121 .kr(2)
15122 .sr(1)
15123 .m(3)
15124 .n(4)
15125 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080015126 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015127 }
15128 }
15129
Marat Dukhan801d2c22021-06-02 21:25:05 -070015130 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_gt_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070015131 TEST_REQUIRES_X86_SSE2;
15132 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015133 for (uint32_t n = 1; n <= 4; n++) {
15134 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070015135 GemmMicrokernelTester()
15136 .mr(3)
15137 .nr(4)
15138 .kr(2)
15139 .sr(1)
15140 .m(m)
15141 .n(n)
15142 .k(k)
15143 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015144 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015145 }
15146 }
15147 }
15148 }
15149
Marat Dukhan801d2c22021-06-02 21:25:05 -070015150 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_div_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070015151 TEST_REQUIRES_X86_SSE2;
15152 for (size_t k = 16; k <= 80; k += 8) {
15153 GemmMicrokernelTester()
15154 .mr(3)
15155 .nr(4)
15156 .kr(2)
15157 .sr(1)
15158 .m(3)
15159 .n(4)
15160 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080015161 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015162 }
15163 }
15164
Marat Dukhan801d2c22021-06-02 21:25:05 -070015165 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_div_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070015166 TEST_REQUIRES_X86_SSE2;
15167 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015168 for (uint32_t n = 1; n <= 4; n++) {
15169 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070015170 GemmMicrokernelTester()
15171 .mr(3)
15172 .nr(4)
15173 .kr(2)
15174 .sr(1)
15175 .m(m)
15176 .n(n)
15177 .k(k)
15178 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015179 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015180 }
15181 }
15182 }
15183 }
15184
Marat Dukhan801d2c22021-06-02 21:25:05 -070015185 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_gt_4) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070015186 TEST_REQUIRES_X86_SSE2;
15187 for (uint32_t n = 5; n < 8; n++) {
15188 for (size_t k = 1; k <= 40; k += 9) {
15189 GemmMicrokernelTester()
15190 .mr(3)
15191 .nr(4)
15192 .kr(2)
15193 .sr(1)
15194 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015195 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070015196 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080015197 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015198 }
15199 }
15200 }
15201
Marat Dukhan801d2c22021-06-02 21:25:05 -070015202 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_gt_4_strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070015203 TEST_REQUIRES_X86_SSE2;
15204 for (uint32_t n = 5; n < 8; n++) {
15205 for (size_t k = 1; k <= 40; k += 9) {
15206 GemmMicrokernelTester()
15207 .mr(3)
15208 .nr(4)
15209 .kr(2)
15210 .sr(1)
15211 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015212 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070015213 .k(k)
15214 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080015215 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015216 }
15217 }
15218 }
15219
Marat Dukhan801d2c22021-06-02 21:25:05 -070015220 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_gt_4_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070015221 TEST_REQUIRES_X86_SSE2;
15222 for (uint32_t n = 5; n < 8; n++) {
15223 for (size_t k = 1; k <= 40; k += 9) {
15224 for (uint32_t m = 1; m <= 3; m++) {
15225 GemmMicrokernelTester()
15226 .mr(3)
15227 .nr(4)
15228 .kr(2)
15229 .sr(1)
15230 .m(m)
15231 .n(n)
15232 .k(k)
15233 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015234 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015235 }
15236 }
15237 }
15238 }
15239
Marat Dukhan801d2c22021-06-02 21:25:05 -070015240 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_div_4) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070015241 TEST_REQUIRES_X86_SSE2;
15242 for (uint32_t n = 8; n <= 12; n += 4) {
15243 for (size_t k = 1; k <= 40; k += 9) {
15244 GemmMicrokernelTester()
15245 .mr(3)
15246 .nr(4)
15247 .kr(2)
15248 .sr(1)
15249 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015250 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070015251 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080015252 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015253 }
15254 }
15255 }
15256
Marat Dukhan801d2c22021-06-02 21:25:05 -070015257 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_div_4_strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070015258 TEST_REQUIRES_X86_SSE2;
15259 for (uint32_t n = 8; n <= 12; n += 4) {
15260 for (size_t k = 1; k <= 40; k += 9) {
15261 GemmMicrokernelTester()
15262 .mr(3)
15263 .nr(4)
15264 .kr(2)
15265 .sr(1)
15266 .m(3)
15267 .n(n)
15268 .k(k)
15269 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080015270 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015271 }
15272 }
15273 }
15274
Marat Dukhan801d2c22021-06-02 21:25:05 -070015275 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_div_4_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070015276 TEST_REQUIRES_X86_SSE2;
15277 for (uint32_t n = 8; n <= 12; n += 4) {
15278 for (size_t k = 1; k <= 40; k += 9) {
15279 for (uint32_t m = 1; m <= 3; m++) {
15280 GemmMicrokernelTester()
15281 .mr(3)
15282 .nr(4)
15283 .kr(2)
15284 .sr(1)
15285 .m(m)
15286 .n(n)
15287 .k(k)
15288 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015289 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015290 }
15291 }
15292 }
15293 }
15294
Marat Dukhan801d2c22021-06-02 21:25:05 -070015295 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, small_kernel) {
15296 TEST_REQUIRES_X86_SSE2;
15297 for (size_t k = 1; k <= 40; k += 9) {
15298 GemmMicrokernelTester()
15299 .mr(3)
15300 .nr(4)
15301 .kr(2)
15302 .sr(1)
15303 .m(3)
15304 .n(4)
15305 .k(k)
15306 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080015307 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070015308 }
15309 }
15310
15311 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, small_kernel_subtile) {
15312 TEST_REQUIRES_X86_SSE2;
15313 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015314 for (uint32_t n = 1; n <= 4; n++) {
15315 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan801d2c22021-06-02 21:25:05 -070015316 GemmMicrokernelTester()
15317 .mr(3)
15318 .nr(4)
15319 .kr(2)
15320 .sr(1)
15321 .m(m)
15322 .n(n)
15323 .k(k)
15324 .ks(3)
15325 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015326 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070015327 }
15328 }
15329 }
15330 }
15331
15332 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_gt_4_small_kernel) {
15333 TEST_REQUIRES_X86_SSE2;
15334 for (uint32_t n = 5; n < 8; n++) {
15335 for (size_t k = 1; k <= 40; k += 9) {
15336 GemmMicrokernelTester()
15337 .mr(3)
15338 .nr(4)
15339 .kr(2)
15340 .sr(1)
15341 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015342 .n(n)
Marat Dukhan801d2c22021-06-02 21:25:05 -070015343 .k(k)
15344 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080015345 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070015346 }
15347 }
15348 }
15349
15350 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_div_4_small_kernel) {
15351 TEST_REQUIRES_X86_SSE2;
15352 for (uint32_t n = 8; n <= 12; n += 4) {
15353 for (size_t k = 1; k <= 40; k += 9) {
15354 GemmMicrokernelTester()
15355 .mr(3)
15356 .nr(4)
15357 .kr(2)
15358 .sr(1)
15359 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015360 .n(n)
Marat Dukhan801d2c22021-06-02 21:25:05 -070015361 .k(k)
15362 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080015363 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070015364 }
15365 }
15366 }
15367
15368 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, strided_cm_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070015369 TEST_REQUIRES_X86_SSE2;
15370 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015371 for (uint32_t n = 1; n <= 4; n++) {
15372 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070015373 GemmMicrokernelTester()
15374 .mr(3)
15375 .nr(4)
15376 .kr(2)
15377 .sr(1)
15378 .m(m)
15379 .n(n)
15380 .k(k)
15381 .cm_stride(7)
15382 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015383 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015384 }
15385 }
15386 }
15387 }
15388
Marat Dukhan801d2c22021-06-02 21:25:05 -070015389 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, a_offset) {
15390 TEST_REQUIRES_X86_SSE2;
15391 for (size_t k = 1; k <= 40; k += 9) {
15392 GemmMicrokernelTester()
15393 .mr(3)
15394 .nr(4)
15395 .kr(2)
15396 .sr(1)
15397 .m(3)
15398 .n(4)
15399 .k(k)
15400 .ks(3)
15401 .a_offset(127)
Marat Dukhan50323b82022-01-11 00:12:01 -080015402 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070015403 }
15404 }
15405
15406 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, zero) {
15407 TEST_REQUIRES_X86_SSE2;
Zhi An Ng83844ae2022-01-14 09:52:25 -080015408 for (size_t k = 1; k <= 40; k += 9) {
15409 for (uint32_t mz = 0; mz < 3; mz++) {
Marat Dukhan801d2c22021-06-02 21:25:05 -070015410 GemmMicrokernelTester()
15411 .mr(3)
15412 .nr(4)
15413 .kr(2)
15414 .sr(1)
15415 .m(3)
15416 .n(4)
15417 .k(k)
15418 .ks(3)
15419 .a_offset(127)
15420 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080015421 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070015422 }
15423 }
15424 }
15425
15426 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, qmin) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070015427 TEST_REQUIRES_X86_SSE2;
15428 GemmMicrokernelTester()
15429 .mr(3)
15430 .nr(4)
15431 .kr(2)
15432 .sr(1)
15433 .m(3)
15434 .n(4)
15435 .k(8)
15436 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015437 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015438 }
15439
Marat Dukhan801d2c22021-06-02 21:25:05 -070015440 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, qmax) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070015441 TEST_REQUIRES_X86_SSE2;
15442 GemmMicrokernelTester()
15443 .mr(3)
15444 .nr(4)
15445 .kr(2)
15446 .sr(1)
15447 .m(3)
15448 .n(4)
15449 .k(8)
15450 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015451 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015452 }
15453
Marat Dukhan801d2c22021-06-02 21:25:05 -070015454 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, strided_cm) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070015455 TEST_REQUIRES_X86_SSE2;
15456 GemmMicrokernelTester()
15457 .mr(3)
15458 .nr(4)
15459 .kr(2)
15460 .sr(1)
15461 .m(3)
15462 .n(4)
15463 .k(8)
15464 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080015465 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015466 }
15467#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
15468
15469
15470#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan801d2c22021-06-02 21:25:05 -070015471 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_eq_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070015472 TEST_REQUIRES_X86_SSE41;
15473 GemmMicrokernelTester()
15474 .mr(3)
15475 .nr(4)
15476 .kr(2)
15477 .sr(1)
15478 .m(3)
15479 .n(4)
15480 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080015481 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015482 }
15483
Marat Dukhan801d2c22021-06-02 21:25:05 -070015484 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070015485 TEST_REQUIRES_X86_SSE41;
15486 GemmMicrokernelTester()
15487 .mr(3)
15488 .nr(4)
15489 .kr(2)
15490 .sr(1)
15491 .m(3)
15492 .n(4)
15493 .k(8)
15494 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080015495 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015496 }
15497
Marat Dukhan801d2c22021-06-02 21:25:05 -070015498 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_eq_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070015499 TEST_REQUIRES_X86_SSE41;
Zhi An Ng83844ae2022-01-14 09:52:25 -080015500 for (uint32_t n = 1; n <= 4; n++) {
15501 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070015502 GemmMicrokernelTester()
15503 .mr(3)
15504 .nr(4)
15505 .kr(2)
15506 .sr(1)
15507 .m(m)
15508 .n(n)
15509 .k(8)
15510 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015511 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015512 }
15513 }
15514 }
15515
Marat Dukhan801d2c22021-06-02 21:25:05 -070015516 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_eq_8_subtile_m) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070015517 TEST_REQUIRES_X86_SSE41;
15518 for (uint32_t m = 1; m <= 3; m++) {
15519 GemmMicrokernelTester()
15520 .mr(3)
15521 .nr(4)
15522 .kr(2)
15523 .sr(1)
15524 .m(m)
15525 .n(4)
15526 .k(8)
15527 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015528 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015529 }
15530 }
15531
Marat Dukhan801d2c22021-06-02 21:25:05 -070015532 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_eq_8_subtile_n) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070015533 TEST_REQUIRES_X86_SSE41;
15534 for (uint32_t n = 1; n <= 4; n++) {
15535 GemmMicrokernelTester()
15536 .mr(3)
15537 .nr(4)
15538 .kr(2)
15539 .sr(1)
15540 .m(3)
15541 .n(n)
15542 .k(8)
15543 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015544 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015545 }
15546 }
15547
Marat Dukhan801d2c22021-06-02 21:25:05 -070015548 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_lt_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070015549 TEST_REQUIRES_X86_SSE41;
15550 for (size_t k = 1; k < 8; k++) {
15551 GemmMicrokernelTester()
15552 .mr(3)
15553 .nr(4)
15554 .kr(2)
15555 .sr(1)
15556 .m(3)
15557 .n(4)
15558 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080015559 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015560 }
15561 }
15562
Marat Dukhan801d2c22021-06-02 21:25:05 -070015563 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_lt_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070015564 TEST_REQUIRES_X86_SSE41;
15565 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015566 for (uint32_t n = 1; n <= 4; n++) {
15567 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070015568 GemmMicrokernelTester()
15569 .mr(3)
15570 .nr(4)
15571 .kr(2)
15572 .sr(1)
15573 .m(m)
15574 .n(n)
15575 .k(k)
15576 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015577 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015578 }
15579 }
15580 }
15581 }
15582
Marat Dukhan801d2c22021-06-02 21:25:05 -070015583 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_gt_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070015584 TEST_REQUIRES_X86_SSE41;
15585 for (size_t k = 9; k < 16; k++) {
15586 GemmMicrokernelTester()
15587 .mr(3)
15588 .nr(4)
15589 .kr(2)
15590 .sr(1)
15591 .m(3)
15592 .n(4)
15593 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080015594 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015595 }
15596 }
15597
Marat Dukhan801d2c22021-06-02 21:25:05 -070015598 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_gt_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070015599 TEST_REQUIRES_X86_SSE41;
15600 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015601 for (uint32_t n = 1; n <= 4; n++) {
15602 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070015603 GemmMicrokernelTester()
15604 .mr(3)
15605 .nr(4)
15606 .kr(2)
15607 .sr(1)
15608 .m(m)
15609 .n(n)
15610 .k(k)
15611 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015612 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015613 }
15614 }
15615 }
15616 }
15617
Marat Dukhan801d2c22021-06-02 21:25:05 -070015618 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_div_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070015619 TEST_REQUIRES_X86_SSE41;
15620 for (size_t k = 16; k <= 80; k += 8) {
15621 GemmMicrokernelTester()
15622 .mr(3)
15623 .nr(4)
15624 .kr(2)
15625 .sr(1)
15626 .m(3)
15627 .n(4)
15628 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080015629 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015630 }
15631 }
15632
Marat Dukhan801d2c22021-06-02 21:25:05 -070015633 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_div_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070015634 TEST_REQUIRES_X86_SSE41;
15635 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015636 for (uint32_t n = 1; n <= 4; n++) {
15637 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070015638 GemmMicrokernelTester()
15639 .mr(3)
15640 .nr(4)
15641 .kr(2)
15642 .sr(1)
15643 .m(m)
15644 .n(n)
15645 .k(k)
15646 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015647 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015648 }
15649 }
15650 }
15651 }
15652
Marat Dukhan801d2c22021-06-02 21:25:05 -070015653 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_gt_4) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070015654 TEST_REQUIRES_X86_SSE41;
15655 for (uint32_t n = 5; n < 8; n++) {
15656 for (size_t k = 1; k <= 40; k += 9) {
15657 GemmMicrokernelTester()
15658 .mr(3)
15659 .nr(4)
15660 .kr(2)
15661 .sr(1)
15662 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015663 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070015664 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080015665 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015666 }
15667 }
15668 }
15669
Marat Dukhan801d2c22021-06-02 21:25:05 -070015670 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_gt_4_strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070015671 TEST_REQUIRES_X86_SSE41;
15672 for (uint32_t n = 5; n < 8; n++) {
15673 for (size_t k = 1; k <= 40; k += 9) {
15674 GemmMicrokernelTester()
15675 .mr(3)
15676 .nr(4)
15677 .kr(2)
15678 .sr(1)
15679 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015680 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070015681 .k(k)
15682 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080015683 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015684 }
15685 }
15686 }
15687
Marat Dukhan801d2c22021-06-02 21:25:05 -070015688 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_gt_4_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070015689 TEST_REQUIRES_X86_SSE41;
15690 for (uint32_t n = 5; n < 8; n++) {
15691 for (size_t k = 1; k <= 40; k += 9) {
15692 for (uint32_t m = 1; m <= 3; m++) {
15693 GemmMicrokernelTester()
15694 .mr(3)
15695 .nr(4)
15696 .kr(2)
15697 .sr(1)
15698 .m(m)
15699 .n(n)
15700 .k(k)
15701 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015702 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015703 }
15704 }
15705 }
15706 }
15707
Marat Dukhan801d2c22021-06-02 21:25:05 -070015708 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_div_4) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070015709 TEST_REQUIRES_X86_SSE41;
15710 for (uint32_t n = 8; n <= 12; n += 4) {
15711 for (size_t k = 1; k <= 40; k += 9) {
15712 GemmMicrokernelTester()
15713 .mr(3)
15714 .nr(4)
15715 .kr(2)
15716 .sr(1)
15717 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015718 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070015719 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080015720 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015721 }
15722 }
15723 }
15724
Marat Dukhan801d2c22021-06-02 21:25:05 -070015725 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_div_4_strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070015726 TEST_REQUIRES_X86_SSE41;
15727 for (uint32_t n = 8; n <= 12; n += 4) {
15728 for (size_t k = 1; k <= 40; k += 9) {
15729 GemmMicrokernelTester()
15730 .mr(3)
15731 .nr(4)
15732 .kr(2)
15733 .sr(1)
15734 .m(3)
15735 .n(n)
15736 .k(k)
15737 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080015738 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015739 }
15740 }
15741 }
15742
Marat Dukhan801d2c22021-06-02 21:25:05 -070015743 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_div_4_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070015744 TEST_REQUIRES_X86_SSE41;
15745 for (uint32_t n = 8; n <= 12; n += 4) {
15746 for (size_t k = 1; k <= 40; k += 9) {
15747 for (uint32_t m = 1; m <= 3; m++) {
15748 GemmMicrokernelTester()
15749 .mr(3)
15750 .nr(4)
15751 .kr(2)
15752 .sr(1)
15753 .m(m)
15754 .n(n)
15755 .k(k)
15756 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015757 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015758 }
15759 }
15760 }
15761 }
15762
Marat Dukhan801d2c22021-06-02 21:25:05 -070015763 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, small_kernel) {
15764 TEST_REQUIRES_X86_SSE41;
15765 for (size_t k = 1; k <= 40; k += 9) {
15766 GemmMicrokernelTester()
15767 .mr(3)
15768 .nr(4)
15769 .kr(2)
15770 .sr(1)
15771 .m(3)
15772 .n(4)
15773 .k(k)
15774 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080015775 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070015776 }
15777 }
15778
15779 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, small_kernel_subtile) {
15780 TEST_REQUIRES_X86_SSE41;
15781 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015782 for (uint32_t n = 1; n <= 4; n++) {
15783 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan801d2c22021-06-02 21:25:05 -070015784 GemmMicrokernelTester()
15785 .mr(3)
15786 .nr(4)
15787 .kr(2)
15788 .sr(1)
15789 .m(m)
15790 .n(n)
15791 .k(k)
15792 .ks(3)
15793 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015794 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070015795 }
15796 }
15797 }
15798 }
15799
15800 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_gt_4_small_kernel) {
15801 TEST_REQUIRES_X86_SSE41;
15802 for (uint32_t n = 5; n < 8; n++) {
15803 for (size_t k = 1; k <= 40; k += 9) {
15804 GemmMicrokernelTester()
15805 .mr(3)
15806 .nr(4)
15807 .kr(2)
15808 .sr(1)
15809 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015810 .n(n)
Marat Dukhan801d2c22021-06-02 21:25:05 -070015811 .k(k)
15812 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080015813 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070015814 }
15815 }
15816 }
15817
15818 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_div_4_small_kernel) {
15819 TEST_REQUIRES_X86_SSE41;
15820 for (uint32_t n = 8; n <= 12; n += 4) {
15821 for (size_t k = 1; k <= 40; k += 9) {
15822 GemmMicrokernelTester()
15823 .mr(3)
15824 .nr(4)
15825 .kr(2)
15826 .sr(1)
15827 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015828 .n(n)
Marat Dukhan801d2c22021-06-02 21:25:05 -070015829 .k(k)
15830 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080015831 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070015832 }
15833 }
15834 }
15835
15836 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, strided_cm_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070015837 TEST_REQUIRES_X86_SSE41;
15838 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015839 for (uint32_t n = 1; n <= 4; n++) {
15840 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070015841 GemmMicrokernelTester()
15842 .mr(3)
15843 .nr(4)
15844 .kr(2)
15845 .sr(1)
15846 .m(m)
15847 .n(n)
15848 .k(k)
15849 .cm_stride(7)
15850 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015851 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015852 }
15853 }
15854 }
15855 }
15856
Marat Dukhan801d2c22021-06-02 21:25:05 -070015857 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, a_offset) {
15858 TEST_REQUIRES_X86_SSE41;
15859 for (size_t k = 1; k <= 40; k += 9) {
15860 GemmMicrokernelTester()
15861 .mr(3)
15862 .nr(4)
15863 .kr(2)
15864 .sr(1)
15865 .m(3)
15866 .n(4)
15867 .k(k)
15868 .ks(3)
15869 .a_offset(127)
Marat Dukhan50323b82022-01-11 00:12:01 -080015870 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070015871 }
15872 }
15873
15874 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, zero) {
15875 TEST_REQUIRES_X86_SSE41;
Zhi An Ng83844ae2022-01-14 09:52:25 -080015876 for (size_t k = 1; k <= 40; k += 9) {
15877 for (uint32_t mz = 0; mz < 3; mz++) {
Marat Dukhan801d2c22021-06-02 21:25:05 -070015878 GemmMicrokernelTester()
15879 .mr(3)
15880 .nr(4)
15881 .kr(2)
15882 .sr(1)
15883 .m(3)
15884 .n(4)
15885 .k(k)
15886 .ks(3)
15887 .a_offset(127)
15888 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080015889 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070015890 }
15891 }
15892 }
15893
15894 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, qmin) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070015895 TEST_REQUIRES_X86_SSE41;
15896 GemmMicrokernelTester()
15897 .mr(3)
15898 .nr(4)
15899 .kr(2)
15900 .sr(1)
15901 .m(3)
15902 .n(4)
15903 .k(8)
15904 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015905 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015906 }
15907
Marat Dukhan801d2c22021-06-02 21:25:05 -070015908 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, qmax) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070015909 TEST_REQUIRES_X86_SSE41;
15910 GemmMicrokernelTester()
15911 .mr(3)
15912 .nr(4)
15913 .kr(2)
15914 .sr(1)
15915 .m(3)
15916 .n(4)
15917 .k(8)
15918 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015919 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015920 }
15921
Marat Dukhan801d2c22021-06-02 21:25:05 -070015922 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, strided_cm) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070015923 TEST_REQUIRES_X86_SSE41;
15924 GemmMicrokernelTester()
15925 .mr(3)
15926 .nr(4)
15927 .kr(2)
15928 .sr(1)
15929 .m(3)
15930 .n(4)
15931 .k(8)
15932 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080015933 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015934 }
15935#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
15936
15937
15938#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan801d2c22021-06-02 21:25:05 -070015939 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_eq_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070015940 TEST_REQUIRES_X86_AVX;
15941 GemmMicrokernelTester()
15942 .mr(2)
15943 .nr(4)
15944 .kr(2)
15945 .sr(1)
15946 .m(2)
15947 .n(4)
15948 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080015949 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015950 }
15951
Marat Dukhan801d2c22021-06-02 21:25:05 -070015952 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070015953 TEST_REQUIRES_X86_AVX;
15954 GemmMicrokernelTester()
15955 .mr(2)
15956 .nr(4)
15957 .kr(2)
15958 .sr(1)
15959 .m(2)
15960 .n(4)
15961 .k(8)
15962 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080015963 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015964 }
15965
Marat Dukhan801d2c22021-06-02 21:25:05 -070015966 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_eq_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070015967 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080015968 for (uint32_t n = 1; n <= 4; n++) {
15969 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070015970 GemmMicrokernelTester()
15971 .mr(2)
15972 .nr(4)
15973 .kr(2)
15974 .sr(1)
15975 .m(m)
15976 .n(n)
15977 .k(8)
15978 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015979 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015980 }
15981 }
15982 }
15983
Marat Dukhan801d2c22021-06-02 21:25:05 -070015984 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_eq_8_subtile_m) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070015985 TEST_REQUIRES_X86_AVX;
15986 for (uint32_t m = 1; m <= 2; m++) {
15987 GemmMicrokernelTester()
15988 .mr(2)
15989 .nr(4)
15990 .kr(2)
15991 .sr(1)
15992 .m(m)
15993 .n(4)
15994 .k(8)
15995 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015996 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015997 }
15998 }
15999
Marat Dukhan801d2c22021-06-02 21:25:05 -070016000 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_eq_8_subtile_n) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070016001 TEST_REQUIRES_X86_AVX;
16002 for (uint32_t n = 1; n <= 4; n++) {
16003 GemmMicrokernelTester()
16004 .mr(2)
16005 .nr(4)
16006 .kr(2)
16007 .sr(1)
16008 .m(2)
16009 .n(n)
16010 .k(8)
16011 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016012 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016013 }
16014 }
16015
Marat Dukhan801d2c22021-06-02 21:25:05 -070016016 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_lt_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070016017 TEST_REQUIRES_X86_AVX;
16018 for (size_t k = 1; k < 8; k++) {
16019 GemmMicrokernelTester()
16020 .mr(2)
16021 .nr(4)
16022 .kr(2)
16023 .sr(1)
16024 .m(2)
16025 .n(4)
16026 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080016027 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016028 }
16029 }
16030
Marat Dukhan801d2c22021-06-02 21:25:05 -070016031 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_lt_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070016032 TEST_REQUIRES_X86_AVX;
16033 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016034 for (uint32_t n = 1; n <= 4; n++) {
16035 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070016036 GemmMicrokernelTester()
16037 .mr(2)
16038 .nr(4)
16039 .kr(2)
16040 .sr(1)
16041 .m(m)
16042 .n(n)
16043 .k(k)
16044 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016045 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016046 }
16047 }
16048 }
16049 }
16050
Marat Dukhan801d2c22021-06-02 21:25:05 -070016051 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_gt_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070016052 TEST_REQUIRES_X86_AVX;
16053 for (size_t k = 9; k < 16; k++) {
16054 GemmMicrokernelTester()
16055 .mr(2)
16056 .nr(4)
16057 .kr(2)
16058 .sr(1)
16059 .m(2)
16060 .n(4)
16061 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080016062 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016063 }
16064 }
16065
Marat Dukhan801d2c22021-06-02 21:25:05 -070016066 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_gt_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070016067 TEST_REQUIRES_X86_AVX;
16068 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016069 for (uint32_t n = 1; n <= 4; n++) {
16070 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070016071 GemmMicrokernelTester()
16072 .mr(2)
16073 .nr(4)
16074 .kr(2)
16075 .sr(1)
16076 .m(m)
16077 .n(n)
16078 .k(k)
16079 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016080 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016081 }
16082 }
16083 }
16084 }
16085
Marat Dukhan801d2c22021-06-02 21:25:05 -070016086 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_div_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070016087 TEST_REQUIRES_X86_AVX;
16088 for (size_t k = 16; k <= 80; k += 8) {
16089 GemmMicrokernelTester()
16090 .mr(2)
16091 .nr(4)
16092 .kr(2)
16093 .sr(1)
16094 .m(2)
16095 .n(4)
16096 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080016097 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016098 }
16099 }
16100
Marat Dukhan801d2c22021-06-02 21:25:05 -070016101 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_div_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070016102 TEST_REQUIRES_X86_AVX;
16103 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016104 for (uint32_t n = 1; n <= 4; n++) {
16105 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070016106 GemmMicrokernelTester()
16107 .mr(2)
16108 .nr(4)
16109 .kr(2)
16110 .sr(1)
16111 .m(m)
16112 .n(n)
16113 .k(k)
16114 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016115 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016116 }
16117 }
16118 }
16119 }
16120
Marat Dukhan801d2c22021-06-02 21:25:05 -070016121 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_gt_4) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070016122 TEST_REQUIRES_X86_AVX;
16123 for (uint32_t n = 5; n < 8; n++) {
16124 for (size_t k = 1; k <= 40; k += 9) {
16125 GemmMicrokernelTester()
16126 .mr(2)
16127 .nr(4)
16128 .kr(2)
16129 .sr(1)
16130 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016131 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070016132 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080016133 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016134 }
16135 }
16136 }
16137
Marat Dukhan801d2c22021-06-02 21:25:05 -070016138 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_gt_4_strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070016139 TEST_REQUIRES_X86_AVX;
16140 for (uint32_t n = 5; n < 8; n++) {
16141 for (size_t k = 1; k <= 40; k += 9) {
16142 GemmMicrokernelTester()
16143 .mr(2)
16144 .nr(4)
16145 .kr(2)
16146 .sr(1)
16147 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016148 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070016149 .k(k)
16150 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080016151 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016152 }
16153 }
16154 }
16155
Marat Dukhan801d2c22021-06-02 21:25:05 -070016156 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_gt_4_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070016157 TEST_REQUIRES_X86_AVX;
16158 for (uint32_t n = 5; n < 8; n++) {
16159 for (size_t k = 1; k <= 40; k += 9) {
16160 for (uint32_t m = 1; m <= 2; m++) {
16161 GemmMicrokernelTester()
16162 .mr(2)
16163 .nr(4)
16164 .kr(2)
16165 .sr(1)
16166 .m(m)
16167 .n(n)
16168 .k(k)
16169 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016170 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016171 }
16172 }
16173 }
16174 }
16175
Marat Dukhan801d2c22021-06-02 21:25:05 -070016176 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_div_4) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070016177 TEST_REQUIRES_X86_AVX;
16178 for (uint32_t n = 8; n <= 12; n += 4) {
16179 for (size_t k = 1; k <= 40; k += 9) {
16180 GemmMicrokernelTester()
16181 .mr(2)
16182 .nr(4)
16183 .kr(2)
16184 .sr(1)
16185 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016186 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070016187 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080016188 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016189 }
16190 }
16191 }
16192
Marat Dukhan801d2c22021-06-02 21:25:05 -070016193 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_div_4_strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070016194 TEST_REQUIRES_X86_AVX;
16195 for (uint32_t n = 8; n <= 12; n += 4) {
16196 for (size_t k = 1; k <= 40; k += 9) {
16197 GemmMicrokernelTester()
16198 .mr(2)
16199 .nr(4)
16200 .kr(2)
16201 .sr(1)
16202 .m(2)
16203 .n(n)
16204 .k(k)
16205 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080016206 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016207 }
16208 }
16209 }
16210
Marat Dukhan801d2c22021-06-02 21:25:05 -070016211 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_div_4_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070016212 TEST_REQUIRES_X86_AVX;
16213 for (uint32_t n = 8; n <= 12; n += 4) {
16214 for (size_t k = 1; k <= 40; k += 9) {
16215 for (uint32_t m = 1; m <= 2; m++) {
16216 GemmMicrokernelTester()
16217 .mr(2)
16218 .nr(4)
16219 .kr(2)
16220 .sr(1)
16221 .m(m)
16222 .n(n)
16223 .k(k)
16224 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016225 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016226 }
16227 }
16228 }
16229 }
16230
Marat Dukhan801d2c22021-06-02 21:25:05 -070016231 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, small_kernel) {
16232 TEST_REQUIRES_X86_AVX;
16233 for (size_t k = 1; k <= 40; k += 9) {
16234 GemmMicrokernelTester()
16235 .mr(2)
16236 .nr(4)
16237 .kr(2)
16238 .sr(1)
16239 .m(2)
16240 .n(4)
16241 .k(k)
16242 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080016243 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070016244 }
16245 }
16246
16247 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, small_kernel_subtile) {
16248 TEST_REQUIRES_X86_AVX;
16249 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016250 for (uint32_t n = 1; n <= 4; n++) {
16251 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan801d2c22021-06-02 21:25:05 -070016252 GemmMicrokernelTester()
16253 .mr(2)
16254 .nr(4)
16255 .kr(2)
16256 .sr(1)
16257 .m(m)
16258 .n(n)
16259 .k(k)
16260 .ks(3)
16261 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016262 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070016263 }
16264 }
16265 }
16266 }
16267
16268 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_gt_4_small_kernel) {
16269 TEST_REQUIRES_X86_AVX;
16270 for (uint32_t n = 5; n < 8; n++) {
16271 for (size_t k = 1; k <= 40; k += 9) {
16272 GemmMicrokernelTester()
16273 .mr(2)
16274 .nr(4)
16275 .kr(2)
16276 .sr(1)
16277 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016278 .n(n)
Marat Dukhan801d2c22021-06-02 21:25:05 -070016279 .k(k)
16280 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080016281 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070016282 }
16283 }
16284 }
16285
16286 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_div_4_small_kernel) {
16287 TEST_REQUIRES_X86_AVX;
16288 for (uint32_t n = 8; n <= 12; n += 4) {
16289 for (size_t k = 1; k <= 40; k += 9) {
16290 GemmMicrokernelTester()
16291 .mr(2)
16292 .nr(4)
16293 .kr(2)
16294 .sr(1)
16295 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016296 .n(n)
Marat Dukhan801d2c22021-06-02 21:25:05 -070016297 .k(k)
16298 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080016299 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070016300 }
16301 }
16302 }
16303
16304 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, strided_cm_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070016305 TEST_REQUIRES_X86_AVX;
16306 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016307 for (uint32_t n = 1; n <= 4; n++) {
16308 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070016309 GemmMicrokernelTester()
16310 .mr(2)
16311 .nr(4)
16312 .kr(2)
16313 .sr(1)
16314 .m(m)
16315 .n(n)
16316 .k(k)
16317 .cm_stride(7)
16318 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016319 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016320 }
16321 }
16322 }
16323 }
16324
Marat Dukhan801d2c22021-06-02 21:25:05 -070016325 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, a_offset) {
16326 TEST_REQUIRES_X86_AVX;
16327 for (size_t k = 1; k <= 40; k += 9) {
16328 GemmMicrokernelTester()
16329 .mr(2)
16330 .nr(4)
16331 .kr(2)
16332 .sr(1)
16333 .m(2)
16334 .n(4)
16335 .k(k)
16336 .ks(3)
16337 .a_offset(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080016338 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070016339 }
16340 }
16341
16342 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, zero) {
16343 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080016344 for (size_t k = 1; k <= 40; k += 9) {
16345 for (uint32_t mz = 0; mz < 2; mz++) {
Marat Dukhan801d2c22021-06-02 21:25:05 -070016346 GemmMicrokernelTester()
16347 .mr(2)
16348 .nr(4)
16349 .kr(2)
16350 .sr(1)
16351 .m(2)
16352 .n(4)
16353 .k(k)
16354 .ks(3)
16355 .a_offset(83)
16356 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080016357 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070016358 }
16359 }
16360 }
16361
16362 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, qmin) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070016363 TEST_REQUIRES_X86_AVX;
16364 GemmMicrokernelTester()
16365 .mr(2)
16366 .nr(4)
16367 .kr(2)
16368 .sr(1)
16369 .m(2)
16370 .n(4)
16371 .k(8)
16372 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016373 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016374 }
16375
Marat Dukhan801d2c22021-06-02 21:25:05 -070016376 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, qmax) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070016377 TEST_REQUIRES_X86_AVX;
16378 GemmMicrokernelTester()
16379 .mr(2)
16380 .nr(4)
16381 .kr(2)
16382 .sr(1)
16383 .m(2)
16384 .n(4)
16385 .k(8)
16386 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016387 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016388 }
16389
Marat Dukhan801d2c22021-06-02 21:25:05 -070016390 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, strided_cm) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070016391 TEST_REQUIRES_X86_AVX;
16392 GemmMicrokernelTester()
16393 .mr(2)
16394 .nr(4)
16395 .kr(2)
16396 .sr(1)
16397 .m(2)
16398 .n(4)
16399 .k(8)
16400 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080016401 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016402 }
16403#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
16404
16405
16406#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan801d2c22021-06-02 21:25:05 -070016407 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_eq_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070016408 TEST_REQUIRES_X86_AVX;
16409 GemmMicrokernelTester()
16410 .mr(3)
16411 .nr(4)
16412 .kr(2)
16413 .sr(1)
16414 .m(3)
16415 .n(4)
16416 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080016417 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016418 }
16419
Marat Dukhan801d2c22021-06-02 21:25:05 -070016420 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070016421 TEST_REQUIRES_X86_AVX;
16422 GemmMicrokernelTester()
16423 .mr(3)
16424 .nr(4)
16425 .kr(2)
16426 .sr(1)
16427 .m(3)
16428 .n(4)
16429 .k(8)
16430 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080016431 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016432 }
16433
Marat Dukhan801d2c22021-06-02 21:25:05 -070016434 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_eq_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070016435 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080016436 for (uint32_t n = 1; n <= 4; n++) {
16437 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070016438 GemmMicrokernelTester()
16439 .mr(3)
16440 .nr(4)
16441 .kr(2)
16442 .sr(1)
16443 .m(m)
16444 .n(n)
16445 .k(8)
16446 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016447 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016448 }
16449 }
16450 }
16451
Marat Dukhan801d2c22021-06-02 21:25:05 -070016452 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_eq_8_subtile_m) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070016453 TEST_REQUIRES_X86_AVX;
16454 for (uint32_t m = 1; m <= 3; m++) {
16455 GemmMicrokernelTester()
16456 .mr(3)
16457 .nr(4)
16458 .kr(2)
16459 .sr(1)
16460 .m(m)
16461 .n(4)
16462 .k(8)
16463 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016464 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016465 }
16466 }
16467
Marat Dukhan801d2c22021-06-02 21:25:05 -070016468 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_eq_8_subtile_n) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070016469 TEST_REQUIRES_X86_AVX;
16470 for (uint32_t n = 1; n <= 4; n++) {
16471 GemmMicrokernelTester()
16472 .mr(3)
16473 .nr(4)
16474 .kr(2)
16475 .sr(1)
16476 .m(3)
16477 .n(n)
16478 .k(8)
16479 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016480 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016481 }
16482 }
16483
Marat Dukhan801d2c22021-06-02 21:25:05 -070016484 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_lt_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070016485 TEST_REQUIRES_X86_AVX;
16486 for (size_t k = 1; k < 8; k++) {
16487 GemmMicrokernelTester()
16488 .mr(3)
16489 .nr(4)
16490 .kr(2)
16491 .sr(1)
16492 .m(3)
16493 .n(4)
16494 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080016495 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016496 }
16497 }
16498
Marat Dukhan801d2c22021-06-02 21:25:05 -070016499 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_lt_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070016500 TEST_REQUIRES_X86_AVX;
16501 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016502 for (uint32_t n = 1; n <= 4; n++) {
16503 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070016504 GemmMicrokernelTester()
16505 .mr(3)
16506 .nr(4)
16507 .kr(2)
16508 .sr(1)
16509 .m(m)
16510 .n(n)
16511 .k(k)
16512 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016513 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016514 }
16515 }
16516 }
16517 }
16518
Marat Dukhan801d2c22021-06-02 21:25:05 -070016519 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_gt_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070016520 TEST_REQUIRES_X86_AVX;
16521 for (size_t k = 9; k < 16; k++) {
16522 GemmMicrokernelTester()
16523 .mr(3)
16524 .nr(4)
16525 .kr(2)
16526 .sr(1)
16527 .m(3)
16528 .n(4)
16529 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080016530 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016531 }
16532 }
16533
Marat Dukhan801d2c22021-06-02 21:25:05 -070016534 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_gt_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070016535 TEST_REQUIRES_X86_AVX;
16536 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016537 for (uint32_t n = 1; n <= 4; n++) {
16538 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070016539 GemmMicrokernelTester()
16540 .mr(3)
16541 .nr(4)
16542 .kr(2)
16543 .sr(1)
16544 .m(m)
16545 .n(n)
16546 .k(k)
16547 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016548 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016549 }
16550 }
16551 }
16552 }
16553
Marat Dukhan801d2c22021-06-02 21:25:05 -070016554 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_div_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070016555 TEST_REQUIRES_X86_AVX;
16556 for (size_t k = 16; k <= 80; k += 8) {
16557 GemmMicrokernelTester()
16558 .mr(3)
16559 .nr(4)
16560 .kr(2)
16561 .sr(1)
16562 .m(3)
16563 .n(4)
16564 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080016565 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016566 }
16567 }
16568
Marat Dukhan801d2c22021-06-02 21:25:05 -070016569 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_div_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070016570 TEST_REQUIRES_X86_AVX;
16571 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016572 for (uint32_t n = 1; n <= 4; n++) {
16573 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070016574 GemmMicrokernelTester()
16575 .mr(3)
16576 .nr(4)
16577 .kr(2)
16578 .sr(1)
16579 .m(m)
16580 .n(n)
16581 .k(k)
16582 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016583 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016584 }
16585 }
16586 }
16587 }
16588
Marat Dukhan801d2c22021-06-02 21:25:05 -070016589 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_gt_4) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070016590 TEST_REQUIRES_X86_AVX;
16591 for (uint32_t n = 5; n < 8; n++) {
16592 for (size_t k = 1; k <= 40; k += 9) {
16593 GemmMicrokernelTester()
16594 .mr(3)
16595 .nr(4)
16596 .kr(2)
16597 .sr(1)
16598 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016599 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070016600 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080016601 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016602 }
16603 }
16604 }
16605
Marat Dukhan801d2c22021-06-02 21:25:05 -070016606 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_gt_4_strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070016607 TEST_REQUIRES_X86_AVX;
16608 for (uint32_t n = 5; n < 8; n++) {
16609 for (size_t k = 1; k <= 40; k += 9) {
16610 GemmMicrokernelTester()
16611 .mr(3)
16612 .nr(4)
16613 .kr(2)
16614 .sr(1)
16615 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016616 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070016617 .k(k)
16618 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080016619 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016620 }
16621 }
16622 }
16623
Marat Dukhan801d2c22021-06-02 21:25:05 -070016624 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_gt_4_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070016625 TEST_REQUIRES_X86_AVX;
16626 for (uint32_t n = 5; n < 8; n++) {
16627 for (size_t k = 1; k <= 40; k += 9) {
16628 for (uint32_t m = 1; m <= 3; m++) {
16629 GemmMicrokernelTester()
16630 .mr(3)
16631 .nr(4)
16632 .kr(2)
16633 .sr(1)
16634 .m(m)
16635 .n(n)
16636 .k(k)
16637 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016638 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016639 }
16640 }
16641 }
16642 }
16643
Marat Dukhan801d2c22021-06-02 21:25:05 -070016644 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_div_4) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070016645 TEST_REQUIRES_X86_AVX;
16646 for (uint32_t n = 8; n <= 12; n += 4) {
16647 for (size_t k = 1; k <= 40; k += 9) {
16648 GemmMicrokernelTester()
16649 .mr(3)
16650 .nr(4)
16651 .kr(2)
16652 .sr(1)
16653 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016654 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070016655 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080016656 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016657 }
16658 }
16659 }
16660
Marat Dukhan801d2c22021-06-02 21:25:05 -070016661 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_div_4_strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070016662 TEST_REQUIRES_X86_AVX;
16663 for (uint32_t n = 8; n <= 12; n += 4) {
16664 for (size_t k = 1; k <= 40; k += 9) {
16665 GemmMicrokernelTester()
16666 .mr(3)
16667 .nr(4)
16668 .kr(2)
16669 .sr(1)
16670 .m(3)
16671 .n(n)
16672 .k(k)
16673 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080016674 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016675 }
16676 }
16677 }
16678
Marat Dukhan801d2c22021-06-02 21:25:05 -070016679 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_div_4_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070016680 TEST_REQUIRES_X86_AVX;
16681 for (uint32_t n = 8; n <= 12; n += 4) {
16682 for (size_t k = 1; k <= 40; k += 9) {
16683 for (uint32_t m = 1; m <= 3; m++) {
16684 GemmMicrokernelTester()
16685 .mr(3)
16686 .nr(4)
16687 .kr(2)
16688 .sr(1)
16689 .m(m)
16690 .n(n)
16691 .k(k)
16692 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016693 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016694 }
16695 }
16696 }
16697 }
16698
Marat Dukhan801d2c22021-06-02 21:25:05 -070016699 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, small_kernel) {
16700 TEST_REQUIRES_X86_AVX;
16701 for (size_t k = 1; k <= 40; k += 9) {
16702 GemmMicrokernelTester()
16703 .mr(3)
16704 .nr(4)
16705 .kr(2)
16706 .sr(1)
16707 .m(3)
16708 .n(4)
16709 .k(k)
16710 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080016711 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070016712 }
16713 }
16714
16715 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, small_kernel_subtile) {
16716 TEST_REQUIRES_X86_AVX;
16717 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016718 for (uint32_t n = 1; n <= 4; n++) {
16719 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan801d2c22021-06-02 21:25:05 -070016720 GemmMicrokernelTester()
16721 .mr(3)
16722 .nr(4)
16723 .kr(2)
16724 .sr(1)
16725 .m(m)
16726 .n(n)
16727 .k(k)
16728 .ks(3)
16729 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016730 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070016731 }
16732 }
16733 }
16734 }
16735
16736 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_gt_4_small_kernel) {
16737 TEST_REQUIRES_X86_AVX;
16738 for (uint32_t n = 5; n < 8; n++) {
16739 for (size_t k = 1; k <= 40; k += 9) {
16740 GemmMicrokernelTester()
16741 .mr(3)
16742 .nr(4)
16743 .kr(2)
16744 .sr(1)
16745 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016746 .n(n)
Marat Dukhan801d2c22021-06-02 21:25:05 -070016747 .k(k)
16748 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080016749 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070016750 }
16751 }
16752 }
16753
16754 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_div_4_small_kernel) {
16755 TEST_REQUIRES_X86_AVX;
16756 for (uint32_t n = 8; n <= 12; n += 4) {
16757 for (size_t k = 1; k <= 40; k += 9) {
16758 GemmMicrokernelTester()
16759 .mr(3)
16760 .nr(4)
16761 .kr(2)
16762 .sr(1)
16763 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016764 .n(n)
Marat Dukhan801d2c22021-06-02 21:25:05 -070016765 .k(k)
16766 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080016767 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070016768 }
16769 }
16770 }
16771
16772 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, strided_cm_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070016773 TEST_REQUIRES_X86_AVX;
16774 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016775 for (uint32_t n = 1; n <= 4; n++) {
16776 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070016777 GemmMicrokernelTester()
16778 .mr(3)
16779 .nr(4)
16780 .kr(2)
16781 .sr(1)
16782 .m(m)
16783 .n(n)
16784 .k(k)
16785 .cm_stride(7)
16786 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016787 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016788 }
16789 }
16790 }
16791 }
16792
Marat Dukhan801d2c22021-06-02 21:25:05 -070016793 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, a_offset) {
16794 TEST_REQUIRES_X86_AVX;
16795 for (size_t k = 1; k <= 40; k += 9) {
16796 GemmMicrokernelTester()
16797 .mr(3)
16798 .nr(4)
16799 .kr(2)
16800 .sr(1)
16801 .m(3)
16802 .n(4)
16803 .k(k)
16804 .ks(3)
16805 .a_offset(127)
Marat Dukhan50323b82022-01-11 00:12:01 -080016806 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070016807 }
16808 }
16809
16810 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, zero) {
16811 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080016812 for (size_t k = 1; k <= 40; k += 9) {
16813 for (uint32_t mz = 0; mz < 3; mz++) {
Marat Dukhan801d2c22021-06-02 21:25:05 -070016814 GemmMicrokernelTester()
16815 .mr(3)
16816 .nr(4)
16817 .kr(2)
16818 .sr(1)
16819 .m(3)
16820 .n(4)
16821 .k(k)
16822 .ks(3)
16823 .a_offset(127)
16824 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080016825 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070016826 }
16827 }
16828 }
16829
16830 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, qmin) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070016831 TEST_REQUIRES_X86_AVX;
16832 GemmMicrokernelTester()
16833 .mr(3)
16834 .nr(4)
16835 .kr(2)
16836 .sr(1)
16837 .m(3)
16838 .n(4)
16839 .k(8)
16840 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016841 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016842 }
16843
Marat Dukhan801d2c22021-06-02 21:25:05 -070016844 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, qmax) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070016845 TEST_REQUIRES_X86_AVX;
16846 GemmMicrokernelTester()
16847 .mr(3)
16848 .nr(4)
16849 .kr(2)
16850 .sr(1)
16851 .m(3)
16852 .n(4)
16853 .k(8)
16854 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016855 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016856 }
16857
Marat Dukhan801d2c22021-06-02 21:25:05 -070016858 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, strided_cm) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070016859 TEST_REQUIRES_X86_AVX;
16860 GemmMicrokernelTester()
16861 .mr(3)
16862 .nr(4)
16863 .kr(2)
16864 .sr(1)
16865 .m(3)
16866 .n(4)
16867 .k(8)
16868 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080016869 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016870 }
16871#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
16872
16873
16874#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan801d2c22021-06-02 21:25:05 -070016875 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_eq_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070016876 TEST_REQUIRES_X86_AVX;
16877 GemmMicrokernelTester()
16878 .mr(4)
16879 .nr(4)
16880 .kr(2)
16881 .sr(1)
16882 .m(4)
16883 .n(4)
16884 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080016885 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016886 }
16887
Marat Dukhan801d2c22021-06-02 21:25:05 -070016888 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070016889 TEST_REQUIRES_X86_AVX;
16890 GemmMicrokernelTester()
16891 .mr(4)
16892 .nr(4)
16893 .kr(2)
16894 .sr(1)
16895 .m(4)
16896 .n(4)
16897 .k(8)
16898 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080016899 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016900 }
16901
Marat Dukhan801d2c22021-06-02 21:25:05 -070016902 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_eq_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070016903 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080016904 for (uint32_t n = 1; n <= 4; n++) {
16905 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070016906 GemmMicrokernelTester()
16907 .mr(4)
16908 .nr(4)
16909 .kr(2)
16910 .sr(1)
16911 .m(m)
16912 .n(n)
16913 .k(8)
16914 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016915 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016916 }
16917 }
16918 }
16919
Marat Dukhan801d2c22021-06-02 21:25:05 -070016920 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_eq_8_subtile_m) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070016921 TEST_REQUIRES_X86_AVX;
16922 for (uint32_t m = 1; m <= 4; m++) {
16923 GemmMicrokernelTester()
16924 .mr(4)
16925 .nr(4)
16926 .kr(2)
16927 .sr(1)
16928 .m(m)
16929 .n(4)
16930 .k(8)
16931 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016932 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016933 }
16934 }
16935
Marat Dukhan801d2c22021-06-02 21:25:05 -070016936 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_eq_8_subtile_n) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070016937 TEST_REQUIRES_X86_AVX;
16938 for (uint32_t n = 1; n <= 4; n++) {
16939 GemmMicrokernelTester()
16940 .mr(4)
16941 .nr(4)
16942 .kr(2)
16943 .sr(1)
16944 .m(4)
16945 .n(n)
16946 .k(8)
16947 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016948 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016949 }
16950 }
16951
Marat Dukhan801d2c22021-06-02 21:25:05 -070016952 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_lt_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070016953 TEST_REQUIRES_X86_AVX;
16954 for (size_t k = 1; k < 8; k++) {
16955 GemmMicrokernelTester()
16956 .mr(4)
16957 .nr(4)
16958 .kr(2)
16959 .sr(1)
16960 .m(4)
16961 .n(4)
16962 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080016963 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016964 }
16965 }
16966
Marat Dukhan801d2c22021-06-02 21:25:05 -070016967 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_lt_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070016968 TEST_REQUIRES_X86_AVX;
16969 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016970 for (uint32_t n = 1; n <= 4; n++) {
16971 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070016972 GemmMicrokernelTester()
16973 .mr(4)
16974 .nr(4)
16975 .kr(2)
16976 .sr(1)
16977 .m(m)
16978 .n(n)
16979 .k(k)
16980 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016981 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016982 }
16983 }
16984 }
16985 }
16986
Marat Dukhan801d2c22021-06-02 21:25:05 -070016987 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_gt_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070016988 TEST_REQUIRES_X86_AVX;
16989 for (size_t k = 9; k < 16; k++) {
16990 GemmMicrokernelTester()
16991 .mr(4)
16992 .nr(4)
16993 .kr(2)
16994 .sr(1)
16995 .m(4)
16996 .n(4)
16997 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080016998 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016999 }
17000 }
17001
Marat Dukhan801d2c22021-06-02 21:25:05 -070017002 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_gt_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070017003 TEST_REQUIRES_X86_AVX;
17004 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017005 for (uint32_t n = 1; n <= 4; n++) {
17006 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070017007 GemmMicrokernelTester()
17008 .mr(4)
17009 .nr(4)
17010 .kr(2)
17011 .sr(1)
17012 .m(m)
17013 .n(n)
17014 .k(k)
17015 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017016 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017017 }
17018 }
17019 }
17020 }
17021
Marat Dukhan801d2c22021-06-02 21:25:05 -070017022 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_div_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070017023 TEST_REQUIRES_X86_AVX;
17024 for (size_t k = 16; k <= 80; k += 8) {
17025 GemmMicrokernelTester()
17026 .mr(4)
17027 .nr(4)
17028 .kr(2)
17029 .sr(1)
17030 .m(4)
17031 .n(4)
17032 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080017033 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017034 }
17035 }
17036
Marat Dukhan801d2c22021-06-02 21:25:05 -070017037 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_div_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070017038 TEST_REQUIRES_X86_AVX;
17039 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017040 for (uint32_t n = 1; n <= 4; n++) {
17041 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070017042 GemmMicrokernelTester()
17043 .mr(4)
17044 .nr(4)
17045 .kr(2)
17046 .sr(1)
17047 .m(m)
17048 .n(n)
17049 .k(k)
17050 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017051 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017052 }
17053 }
17054 }
17055 }
17056
Marat Dukhan801d2c22021-06-02 21:25:05 -070017057 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_gt_4) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070017058 TEST_REQUIRES_X86_AVX;
17059 for (uint32_t n = 5; n < 8; n++) {
17060 for (size_t k = 1; k <= 40; k += 9) {
17061 GemmMicrokernelTester()
17062 .mr(4)
17063 .nr(4)
17064 .kr(2)
17065 .sr(1)
17066 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017067 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070017068 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080017069 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017070 }
17071 }
17072 }
17073
Marat Dukhan801d2c22021-06-02 21:25:05 -070017074 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_gt_4_strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070017075 TEST_REQUIRES_X86_AVX;
17076 for (uint32_t n = 5; n < 8; n++) {
17077 for (size_t k = 1; k <= 40; k += 9) {
17078 GemmMicrokernelTester()
17079 .mr(4)
17080 .nr(4)
17081 .kr(2)
17082 .sr(1)
17083 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017084 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070017085 .k(k)
17086 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080017087 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017088 }
17089 }
17090 }
17091
Marat Dukhan801d2c22021-06-02 21:25:05 -070017092 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_gt_4_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070017093 TEST_REQUIRES_X86_AVX;
17094 for (uint32_t n = 5; n < 8; n++) {
17095 for (size_t k = 1; k <= 40; k += 9) {
17096 for (uint32_t m = 1; m <= 4; m++) {
17097 GemmMicrokernelTester()
17098 .mr(4)
17099 .nr(4)
17100 .kr(2)
17101 .sr(1)
17102 .m(m)
17103 .n(n)
17104 .k(k)
17105 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017106 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017107 }
17108 }
17109 }
17110 }
17111
Marat Dukhan801d2c22021-06-02 21:25:05 -070017112 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_div_4) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070017113 TEST_REQUIRES_X86_AVX;
17114 for (uint32_t n = 8; n <= 12; n += 4) {
17115 for (size_t k = 1; k <= 40; k += 9) {
17116 GemmMicrokernelTester()
17117 .mr(4)
17118 .nr(4)
17119 .kr(2)
17120 .sr(1)
17121 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017122 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070017123 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080017124 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017125 }
17126 }
17127 }
17128
Marat Dukhan801d2c22021-06-02 21:25:05 -070017129 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_div_4_strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070017130 TEST_REQUIRES_X86_AVX;
17131 for (uint32_t n = 8; n <= 12; n += 4) {
17132 for (size_t k = 1; k <= 40; k += 9) {
17133 GemmMicrokernelTester()
17134 .mr(4)
17135 .nr(4)
17136 .kr(2)
17137 .sr(1)
17138 .m(4)
17139 .n(n)
17140 .k(k)
17141 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080017142 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017143 }
17144 }
17145 }
17146
Marat Dukhan801d2c22021-06-02 21:25:05 -070017147 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_div_4_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070017148 TEST_REQUIRES_X86_AVX;
17149 for (uint32_t n = 8; n <= 12; n += 4) {
17150 for (size_t k = 1; k <= 40; k += 9) {
17151 for (uint32_t m = 1; m <= 4; m++) {
17152 GemmMicrokernelTester()
17153 .mr(4)
17154 .nr(4)
17155 .kr(2)
17156 .sr(1)
17157 .m(m)
17158 .n(n)
17159 .k(k)
17160 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017161 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017162 }
17163 }
17164 }
17165 }
17166
Marat Dukhan801d2c22021-06-02 21:25:05 -070017167 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, small_kernel) {
17168 TEST_REQUIRES_X86_AVX;
17169 for (size_t k = 1; k <= 40; k += 9) {
17170 GemmMicrokernelTester()
17171 .mr(4)
17172 .nr(4)
17173 .kr(2)
17174 .sr(1)
17175 .m(4)
17176 .n(4)
17177 .k(k)
17178 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080017179 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070017180 }
17181 }
17182
17183 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, small_kernel_subtile) {
17184 TEST_REQUIRES_X86_AVX;
17185 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017186 for (uint32_t n = 1; n <= 4; n++) {
17187 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan801d2c22021-06-02 21:25:05 -070017188 GemmMicrokernelTester()
17189 .mr(4)
17190 .nr(4)
17191 .kr(2)
17192 .sr(1)
17193 .m(m)
17194 .n(n)
17195 .k(k)
17196 .ks(3)
17197 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017198 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070017199 }
17200 }
17201 }
17202 }
17203
17204 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_gt_4_small_kernel) {
17205 TEST_REQUIRES_X86_AVX;
17206 for (uint32_t n = 5; n < 8; n++) {
17207 for (size_t k = 1; k <= 40; k += 9) {
17208 GemmMicrokernelTester()
17209 .mr(4)
17210 .nr(4)
17211 .kr(2)
17212 .sr(1)
17213 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017214 .n(n)
Marat Dukhan801d2c22021-06-02 21:25:05 -070017215 .k(k)
17216 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080017217 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070017218 }
17219 }
17220 }
17221
17222 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_div_4_small_kernel) {
17223 TEST_REQUIRES_X86_AVX;
17224 for (uint32_t n = 8; n <= 12; n += 4) {
17225 for (size_t k = 1; k <= 40; k += 9) {
17226 GemmMicrokernelTester()
17227 .mr(4)
17228 .nr(4)
17229 .kr(2)
17230 .sr(1)
17231 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017232 .n(n)
Marat Dukhan801d2c22021-06-02 21:25:05 -070017233 .k(k)
17234 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080017235 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070017236 }
17237 }
17238 }
17239
17240 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, strided_cm_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070017241 TEST_REQUIRES_X86_AVX;
17242 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017243 for (uint32_t n = 1; n <= 4; n++) {
17244 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070017245 GemmMicrokernelTester()
17246 .mr(4)
17247 .nr(4)
17248 .kr(2)
17249 .sr(1)
17250 .m(m)
17251 .n(n)
17252 .k(k)
17253 .cm_stride(7)
17254 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017255 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017256 }
17257 }
17258 }
17259 }
17260
Marat Dukhan801d2c22021-06-02 21:25:05 -070017261 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, a_offset) {
17262 TEST_REQUIRES_X86_AVX;
17263 for (size_t k = 1; k <= 40; k += 9) {
17264 GemmMicrokernelTester()
17265 .mr(4)
17266 .nr(4)
17267 .kr(2)
17268 .sr(1)
17269 .m(4)
17270 .n(4)
17271 .k(k)
17272 .ks(3)
17273 .a_offset(163)
Marat Dukhan50323b82022-01-11 00:12:01 -080017274 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070017275 }
17276 }
17277
17278 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, zero) {
17279 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080017280 for (size_t k = 1; k <= 40; k += 9) {
17281 for (uint32_t mz = 0; mz < 4; mz++) {
Marat Dukhan801d2c22021-06-02 21:25:05 -070017282 GemmMicrokernelTester()
17283 .mr(4)
17284 .nr(4)
17285 .kr(2)
17286 .sr(1)
17287 .m(4)
17288 .n(4)
17289 .k(k)
17290 .ks(3)
17291 .a_offset(163)
17292 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080017293 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070017294 }
17295 }
17296 }
17297
17298 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, qmin) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070017299 TEST_REQUIRES_X86_AVX;
17300 GemmMicrokernelTester()
17301 .mr(4)
17302 .nr(4)
17303 .kr(2)
17304 .sr(1)
17305 .m(4)
17306 .n(4)
17307 .k(8)
17308 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017309 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017310 }
17311
Marat Dukhan801d2c22021-06-02 21:25:05 -070017312 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, qmax) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070017313 TEST_REQUIRES_X86_AVX;
17314 GemmMicrokernelTester()
17315 .mr(4)
17316 .nr(4)
17317 .kr(2)
17318 .sr(1)
17319 .m(4)
17320 .n(4)
17321 .k(8)
17322 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017323 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017324 }
17325
Marat Dukhan801d2c22021-06-02 21:25:05 -070017326 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, strided_cm) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070017327 TEST_REQUIRES_X86_AVX;
17328 GemmMicrokernelTester()
17329 .mr(4)
17330 .nr(4)
17331 .kr(2)
17332 .sr(1)
17333 .m(4)
17334 .n(4)
17335 .k(8)
17336 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080017337 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017338 }
17339#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
17340
17341
17342#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan801d2c22021-06-02 21:25:05 -070017343 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_eq_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070017344 TEST_REQUIRES_X86_XOP;
17345 GemmMicrokernelTester()
17346 .mr(2)
17347 .nr(4)
17348 .kr(2)
17349 .sr(1)
17350 .m(2)
17351 .n(4)
17352 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080017353 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017354 }
17355
Marat Dukhan801d2c22021-06-02 21:25:05 -070017356 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070017357 TEST_REQUIRES_X86_XOP;
17358 GemmMicrokernelTester()
17359 .mr(2)
17360 .nr(4)
17361 .kr(2)
17362 .sr(1)
17363 .m(2)
17364 .n(4)
17365 .k(8)
17366 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080017367 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017368 }
17369
Marat Dukhan801d2c22021-06-02 21:25:05 -070017370 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_eq_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070017371 TEST_REQUIRES_X86_XOP;
Zhi An Ng83844ae2022-01-14 09:52:25 -080017372 for (uint32_t n = 1; n <= 4; n++) {
17373 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070017374 GemmMicrokernelTester()
17375 .mr(2)
17376 .nr(4)
17377 .kr(2)
17378 .sr(1)
17379 .m(m)
17380 .n(n)
17381 .k(8)
17382 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017383 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017384 }
17385 }
17386 }
17387
Marat Dukhan801d2c22021-06-02 21:25:05 -070017388 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_eq_8_subtile_m) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070017389 TEST_REQUIRES_X86_XOP;
17390 for (uint32_t m = 1; m <= 2; m++) {
17391 GemmMicrokernelTester()
17392 .mr(2)
17393 .nr(4)
17394 .kr(2)
17395 .sr(1)
17396 .m(m)
17397 .n(4)
17398 .k(8)
17399 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017400 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017401 }
17402 }
17403
Marat Dukhan801d2c22021-06-02 21:25:05 -070017404 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_eq_8_subtile_n) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070017405 TEST_REQUIRES_X86_XOP;
17406 for (uint32_t n = 1; n <= 4; n++) {
17407 GemmMicrokernelTester()
17408 .mr(2)
17409 .nr(4)
17410 .kr(2)
17411 .sr(1)
17412 .m(2)
17413 .n(n)
17414 .k(8)
17415 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017416 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017417 }
17418 }
17419
Marat Dukhan801d2c22021-06-02 21:25:05 -070017420 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_lt_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070017421 TEST_REQUIRES_X86_XOP;
17422 for (size_t k = 1; k < 8; k++) {
17423 GemmMicrokernelTester()
17424 .mr(2)
17425 .nr(4)
17426 .kr(2)
17427 .sr(1)
17428 .m(2)
17429 .n(4)
17430 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080017431 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017432 }
17433 }
17434
Marat Dukhan801d2c22021-06-02 21:25:05 -070017435 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_lt_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070017436 TEST_REQUIRES_X86_XOP;
17437 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017438 for (uint32_t n = 1; n <= 4; n++) {
17439 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070017440 GemmMicrokernelTester()
17441 .mr(2)
17442 .nr(4)
17443 .kr(2)
17444 .sr(1)
17445 .m(m)
17446 .n(n)
17447 .k(k)
17448 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017449 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017450 }
17451 }
17452 }
17453 }
17454
Marat Dukhan801d2c22021-06-02 21:25:05 -070017455 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_gt_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070017456 TEST_REQUIRES_X86_XOP;
17457 for (size_t k = 9; k < 16; k++) {
17458 GemmMicrokernelTester()
17459 .mr(2)
17460 .nr(4)
17461 .kr(2)
17462 .sr(1)
17463 .m(2)
17464 .n(4)
17465 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080017466 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017467 }
17468 }
17469
Marat Dukhan801d2c22021-06-02 21:25:05 -070017470 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_gt_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070017471 TEST_REQUIRES_X86_XOP;
17472 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017473 for (uint32_t n = 1; n <= 4; n++) {
17474 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070017475 GemmMicrokernelTester()
17476 .mr(2)
17477 .nr(4)
17478 .kr(2)
17479 .sr(1)
17480 .m(m)
17481 .n(n)
17482 .k(k)
17483 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017484 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017485 }
17486 }
17487 }
17488 }
17489
Marat Dukhan801d2c22021-06-02 21:25:05 -070017490 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_div_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070017491 TEST_REQUIRES_X86_XOP;
17492 for (size_t k = 16; k <= 80; k += 8) {
17493 GemmMicrokernelTester()
17494 .mr(2)
17495 .nr(4)
17496 .kr(2)
17497 .sr(1)
17498 .m(2)
17499 .n(4)
17500 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080017501 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017502 }
17503 }
17504
Marat Dukhan801d2c22021-06-02 21:25:05 -070017505 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_div_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070017506 TEST_REQUIRES_X86_XOP;
17507 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017508 for (uint32_t n = 1; n <= 4; n++) {
17509 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070017510 GemmMicrokernelTester()
17511 .mr(2)
17512 .nr(4)
17513 .kr(2)
17514 .sr(1)
17515 .m(m)
17516 .n(n)
17517 .k(k)
17518 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017519 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017520 }
17521 }
17522 }
17523 }
17524
Marat Dukhan801d2c22021-06-02 21:25:05 -070017525 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_gt_4) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070017526 TEST_REQUIRES_X86_XOP;
17527 for (uint32_t n = 5; n < 8; n++) {
17528 for (size_t k = 1; k <= 40; k += 9) {
17529 GemmMicrokernelTester()
17530 .mr(2)
17531 .nr(4)
17532 .kr(2)
17533 .sr(1)
17534 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017535 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070017536 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080017537 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017538 }
17539 }
17540 }
17541
Marat Dukhan801d2c22021-06-02 21:25:05 -070017542 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_gt_4_strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070017543 TEST_REQUIRES_X86_XOP;
17544 for (uint32_t n = 5; n < 8; n++) {
17545 for (size_t k = 1; k <= 40; k += 9) {
17546 GemmMicrokernelTester()
17547 .mr(2)
17548 .nr(4)
17549 .kr(2)
17550 .sr(1)
17551 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017552 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070017553 .k(k)
17554 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080017555 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017556 }
17557 }
17558 }
17559
Marat Dukhan801d2c22021-06-02 21:25:05 -070017560 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_gt_4_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070017561 TEST_REQUIRES_X86_XOP;
17562 for (uint32_t n = 5; n < 8; n++) {
17563 for (size_t k = 1; k <= 40; k += 9) {
17564 for (uint32_t m = 1; m <= 2; m++) {
17565 GemmMicrokernelTester()
17566 .mr(2)
17567 .nr(4)
17568 .kr(2)
17569 .sr(1)
17570 .m(m)
17571 .n(n)
17572 .k(k)
17573 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017574 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017575 }
17576 }
17577 }
17578 }
17579
Marat Dukhan801d2c22021-06-02 21:25:05 -070017580 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_div_4) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070017581 TEST_REQUIRES_X86_XOP;
17582 for (uint32_t n = 8; n <= 12; n += 4) {
17583 for (size_t k = 1; k <= 40; k += 9) {
17584 GemmMicrokernelTester()
17585 .mr(2)
17586 .nr(4)
17587 .kr(2)
17588 .sr(1)
17589 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017590 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070017591 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080017592 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017593 }
17594 }
17595 }
17596
Marat Dukhan801d2c22021-06-02 21:25:05 -070017597 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_div_4_strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070017598 TEST_REQUIRES_X86_XOP;
17599 for (uint32_t n = 8; n <= 12; n += 4) {
17600 for (size_t k = 1; k <= 40; k += 9) {
17601 GemmMicrokernelTester()
17602 .mr(2)
17603 .nr(4)
17604 .kr(2)
17605 .sr(1)
17606 .m(2)
17607 .n(n)
17608 .k(k)
17609 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080017610 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017611 }
17612 }
17613 }
17614
Marat Dukhan801d2c22021-06-02 21:25:05 -070017615 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_div_4_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070017616 TEST_REQUIRES_X86_XOP;
17617 for (uint32_t n = 8; n <= 12; n += 4) {
17618 for (size_t k = 1; k <= 40; k += 9) {
17619 for (uint32_t m = 1; m <= 2; m++) {
17620 GemmMicrokernelTester()
17621 .mr(2)
17622 .nr(4)
17623 .kr(2)
17624 .sr(1)
17625 .m(m)
17626 .n(n)
17627 .k(k)
17628 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017629 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017630 }
17631 }
17632 }
17633 }
17634
Marat Dukhan801d2c22021-06-02 21:25:05 -070017635 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, small_kernel) {
17636 TEST_REQUIRES_X86_XOP;
17637 for (size_t k = 1; k <= 40; k += 9) {
17638 GemmMicrokernelTester()
17639 .mr(2)
17640 .nr(4)
17641 .kr(2)
17642 .sr(1)
17643 .m(2)
17644 .n(4)
17645 .k(k)
17646 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080017647 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070017648 }
17649 }
17650
17651 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, small_kernel_subtile) {
17652 TEST_REQUIRES_X86_XOP;
17653 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017654 for (uint32_t n = 1; n <= 4; n++) {
17655 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan801d2c22021-06-02 21:25:05 -070017656 GemmMicrokernelTester()
17657 .mr(2)
17658 .nr(4)
17659 .kr(2)
17660 .sr(1)
17661 .m(m)
17662 .n(n)
17663 .k(k)
17664 .ks(3)
17665 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017666 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070017667 }
17668 }
17669 }
17670 }
17671
17672 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_gt_4_small_kernel) {
17673 TEST_REQUIRES_X86_XOP;
17674 for (uint32_t n = 5; n < 8; n++) {
17675 for (size_t k = 1; k <= 40; k += 9) {
17676 GemmMicrokernelTester()
17677 .mr(2)
17678 .nr(4)
17679 .kr(2)
17680 .sr(1)
17681 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017682 .n(n)
Marat Dukhan801d2c22021-06-02 21:25:05 -070017683 .k(k)
17684 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080017685 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070017686 }
17687 }
17688 }
17689
17690 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_div_4_small_kernel) {
17691 TEST_REQUIRES_X86_XOP;
17692 for (uint32_t n = 8; n <= 12; n += 4) {
17693 for (size_t k = 1; k <= 40; k += 9) {
17694 GemmMicrokernelTester()
17695 .mr(2)
17696 .nr(4)
17697 .kr(2)
17698 .sr(1)
17699 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017700 .n(n)
Marat Dukhan801d2c22021-06-02 21:25:05 -070017701 .k(k)
17702 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080017703 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070017704 }
17705 }
17706 }
17707
17708 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, strided_cm_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070017709 TEST_REQUIRES_X86_XOP;
17710 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017711 for (uint32_t n = 1; n <= 4; n++) {
17712 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070017713 GemmMicrokernelTester()
17714 .mr(2)
17715 .nr(4)
17716 .kr(2)
17717 .sr(1)
17718 .m(m)
17719 .n(n)
17720 .k(k)
17721 .cm_stride(7)
17722 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017723 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017724 }
17725 }
17726 }
17727 }
17728
Marat Dukhan801d2c22021-06-02 21:25:05 -070017729 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, a_offset) {
17730 TEST_REQUIRES_X86_XOP;
17731 for (size_t k = 1; k <= 40; k += 9) {
17732 GemmMicrokernelTester()
17733 .mr(2)
17734 .nr(4)
17735 .kr(2)
17736 .sr(1)
17737 .m(2)
17738 .n(4)
17739 .k(k)
17740 .ks(3)
17741 .a_offset(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080017742 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070017743 }
17744 }
17745
17746 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, zero) {
17747 TEST_REQUIRES_X86_XOP;
Zhi An Ng83844ae2022-01-14 09:52:25 -080017748 for (size_t k = 1; k <= 40; k += 9) {
17749 for (uint32_t mz = 0; mz < 2; mz++) {
Marat Dukhan801d2c22021-06-02 21:25:05 -070017750 GemmMicrokernelTester()
17751 .mr(2)
17752 .nr(4)
17753 .kr(2)
17754 .sr(1)
17755 .m(2)
17756 .n(4)
17757 .k(k)
17758 .ks(3)
17759 .a_offset(83)
17760 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080017761 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070017762 }
17763 }
17764 }
17765
17766 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, qmin) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070017767 TEST_REQUIRES_X86_XOP;
17768 GemmMicrokernelTester()
17769 .mr(2)
17770 .nr(4)
17771 .kr(2)
17772 .sr(1)
17773 .m(2)
17774 .n(4)
17775 .k(8)
17776 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017777 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017778 }
17779
Marat Dukhan801d2c22021-06-02 21:25:05 -070017780 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, qmax) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070017781 TEST_REQUIRES_X86_XOP;
17782 GemmMicrokernelTester()
17783 .mr(2)
17784 .nr(4)
17785 .kr(2)
17786 .sr(1)
17787 .m(2)
17788 .n(4)
17789 .k(8)
17790 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017791 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017792 }
17793
Marat Dukhan801d2c22021-06-02 21:25:05 -070017794 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, strided_cm) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070017795 TEST_REQUIRES_X86_XOP;
17796 GemmMicrokernelTester()
17797 .mr(2)
17798 .nr(4)
17799 .kr(2)
17800 .sr(1)
17801 .m(2)
17802 .n(4)
17803 .k(8)
17804 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080017805 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017806 }
17807#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
17808
17809
17810#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan801d2c22021-06-02 21:25:05 -070017811 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_eq_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070017812 TEST_REQUIRES_X86_XOP;
17813 GemmMicrokernelTester()
17814 .mr(3)
17815 .nr(4)
17816 .kr(2)
17817 .sr(1)
17818 .m(3)
17819 .n(4)
17820 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080017821 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017822 }
17823
Marat Dukhan801d2c22021-06-02 21:25:05 -070017824 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070017825 TEST_REQUIRES_X86_XOP;
17826 GemmMicrokernelTester()
17827 .mr(3)
17828 .nr(4)
17829 .kr(2)
17830 .sr(1)
17831 .m(3)
17832 .n(4)
17833 .k(8)
17834 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080017835 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017836 }
17837
Marat Dukhan801d2c22021-06-02 21:25:05 -070017838 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_eq_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070017839 TEST_REQUIRES_X86_XOP;
Zhi An Ng83844ae2022-01-14 09:52:25 -080017840 for (uint32_t n = 1; n <= 4; n++) {
17841 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070017842 GemmMicrokernelTester()
17843 .mr(3)
17844 .nr(4)
17845 .kr(2)
17846 .sr(1)
17847 .m(m)
17848 .n(n)
17849 .k(8)
17850 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017851 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017852 }
17853 }
17854 }
17855
Marat Dukhan801d2c22021-06-02 21:25:05 -070017856 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_eq_8_subtile_m) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070017857 TEST_REQUIRES_X86_XOP;
17858 for (uint32_t m = 1; m <= 3; m++) {
17859 GemmMicrokernelTester()
17860 .mr(3)
17861 .nr(4)
17862 .kr(2)
17863 .sr(1)
17864 .m(m)
17865 .n(4)
17866 .k(8)
17867 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017868 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017869 }
17870 }
17871
Marat Dukhan801d2c22021-06-02 21:25:05 -070017872 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_eq_8_subtile_n) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070017873 TEST_REQUIRES_X86_XOP;
17874 for (uint32_t n = 1; n <= 4; n++) {
17875 GemmMicrokernelTester()
17876 .mr(3)
17877 .nr(4)
17878 .kr(2)
17879 .sr(1)
17880 .m(3)
17881 .n(n)
17882 .k(8)
17883 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017884 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017885 }
17886 }
17887
Marat Dukhan801d2c22021-06-02 21:25:05 -070017888 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_lt_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070017889 TEST_REQUIRES_X86_XOP;
17890 for (size_t k = 1; k < 8; k++) {
17891 GemmMicrokernelTester()
17892 .mr(3)
17893 .nr(4)
17894 .kr(2)
17895 .sr(1)
17896 .m(3)
17897 .n(4)
17898 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080017899 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017900 }
17901 }
17902
Marat Dukhan801d2c22021-06-02 21:25:05 -070017903 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_lt_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070017904 TEST_REQUIRES_X86_XOP;
17905 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017906 for (uint32_t n = 1; n <= 4; n++) {
17907 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070017908 GemmMicrokernelTester()
17909 .mr(3)
17910 .nr(4)
17911 .kr(2)
17912 .sr(1)
17913 .m(m)
17914 .n(n)
17915 .k(k)
17916 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017917 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017918 }
17919 }
17920 }
17921 }
17922
Marat Dukhan801d2c22021-06-02 21:25:05 -070017923 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_gt_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070017924 TEST_REQUIRES_X86_XOP;
17925 for (size_t k = 9; k < 16; k++) {
17926 GemmMicrokernelTester()
17927 .mr(3)
17928 .nr(4)
17929 .kr(2)
17930 .sr(1)
17931 .m(3)
17932 .n(4)
17933 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080017934 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017935 }
17936 }
17937
Marat Dukhan801d2c22021-06-02 21:25:05 -070017938 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_gt_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070017939 TEST_REQUIRES_X86_XOP;
17940 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017941 for (uint32_t n = 1; n <= 4; n++) {
17942 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070017943 GemmMicrokernelTester()
17944 .mr(3)
17945 .nr(4)
17946 .kr(2)
17947 .sr(1)
17948 .m(m)
17949 .n(n)
17950 .k(k)
17951 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017952 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017953 }
17954 }
17955 }
17956 }
17957
Marat Dukhan801d2c22021-06-02 21:25:05 -070017958 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_div_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070017959 TEST_REQUIRES_X86_XOP;
17960 for (size_t k = 16; k <= 80; k += 8) {
17961 GemmMicrokernelTester()
17962 .mr(3)
17963 .nr(4)
17964 .kr(2)
17965 .sr(1)
17966 .m(3)
17967 .n(4)
17968 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080017969 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017970 }
17971 }
17972
Marat Dukhan801d2c22021-06-02 21:25:05 -070017973 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_div_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070017974 TEST_REQUIRES_X86_XOP;
17975 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017976 for (uint32_t n = 1; n <= 4; n++) {
17977 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070017978 GemmMicrokernelTester()
17979 .mr(3)
17980 .nr(4)
17981 .kr(2)
17982 .sr(1)
17983 .m(m)
17984 .n(n)
17985 .k(k)
17986 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017987 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017988 }
17989 }
17990 }
17991 }
17992
Marat Dukhan801d2c22021-06-02 21:25:05 -070017993 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_gt_4) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070017994 TEST_REQUIRES_X86_XOP;
17995 for (uint32_t n = 5; n < 8; n++) {
17996 for (size_t k = 1; k <= 40; k += 9) {
17997 GemmMicrokernelTester()
17998 .mr(3)
17999 .nr(4)
18000 .kr(2)
18001 .sr(1)
18002 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018003 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070018004 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080018005 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018006 }
18007 }
18008 }
18009
Marat Dukhan801d2c22021-06-02 21:25:05 -070018010 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_gt_4_strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070018011 TEST_REQUIRES_X86_XOP;
18012 for (uint32_t n = 5; n < 8; n++) {
18013 for (size_t k = 1; k <= 40; k += 9) {
18014 GemmMicrokernelTester()
18015 .mr(3)
18016 .nr(4)
18017 .kr(2)
18018 .sr(1)
18019 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018020 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070018021 .k(k)
18022 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080018023 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018024 }
18025 }
18026 }
18027
Marat Dukhan801d2c22021-06-02 21:25:05 -070018028 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_gt_4_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070018029 TEST_REQUIRES_X86_XOP;
18030 for (uint32_t n = 5; n < 8; n++) {
18031 for (size_t k = 1; k <= 40; k += 9) {
18032 for (uint32_t m = 1; m <= 3; m++) {
18033 GemmMicrokernelTester()
18034 .mr(3)
18035 .nr(4)
18036 .kr(2)
18037 .sr(1)
18038 .m(m)
18039 .n(n)
18040 .k(k)
18041 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018042 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018043 }
18044 }
18045 }
18046 }
18047
Marat Dukhan801d2c22021-06-02 21:25:05 -070018048 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_div_4) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070018049 TEST_REQUIRES_X86_XOP;
18050 for (uint32_t n = 8; n <= 12; n += 4) {
18051 for (size_t k = 1; k <= 40; k += 9) {
18052 GemmMicrokernelTester()
18053 .mr(3)
18054 .nr(4)
18055 .kr(2)
18056 .sr(1)
18057 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018058 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070018059 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080018060 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018061 }
18062 }
18063 }
18064
Marat Dukhan801d2c22021-06-02 21:25:05 -070018065 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_div_4_strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070018066 TEST_REQUIRES_X86_XOP;
18067 for (uint32_t n = 8; n <= 12; n += 4) {
18068 for (size_t k = 1; k <= 40; k += 9) {
18069 GemmMicrokernelTester()
18070 .mr(3)
18071 .nr(4)
18072 .kr(2)
18073 .sr(1)
18074 .m(3)
18075 .n(n)
18076 .k(k)
18077 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080018078 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018079 }
18080 }
18081 }
18082
Marat Dukhan801d2c22021-06-02 21:25:05 -070018083 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_div_4_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070018084 TEST_REQUIRES_X86_XOP;
18085 for (uint32_t n = 8; n <= 12; n += 4) {
18086 for (size_t k = 1; k <= 40; k += 9) {
18087 for (uint32_t m = 1; m <= 3; m++) {
18088 GemmMicrokernelTester()
18089 .mr(3)
18090 .nr(4)
18091 .kr(2)
18092 .sr(1)
18093 .m(m)
18094 .n(n)
18095 .k(k)
18096 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018097 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018098 }
18099 }
18100 }
18101 }
18102
Marat Dukhan801d2c22021-06-02 21:25:05 -070018103 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, small_kernel) {
18104 TEST_REQUIRES_X86_XOP;
18105 for (size_t k = 1; k <= 40; k += 9) {
18106 GemmMicrokernelTester()
18107 .mr(3)
18108 .nr(4)
18109 .kr(2)
18110 .sr(1)
18111 .m(3)
18112 .n(4)
18113 .k(k)
18114 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080018115 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070018116 }
18117 }
18118
18119 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, small_kernel_subtile) {
18120 TEST_REQUIRES_X86_XOP;
18121 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018122 for (uint32_t n = 1; n <= 4; n++) {
18123 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan801d2c22021-06-02 21:25:05 -070018124 GemmMicrokernelTester()
18125 .mr(3)
18126 .nr(4)
18127 .kr(2)
18128 .sr(1)
18129 .m(m)
18130 .n(n)
18131 .k(k)
18132 .ks(3)
18133 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018134 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070018135 }
18136 }
18137 }
18138 }
18139
18140 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_gt_4_small_kernel) {
18141 TEST_REQUIRES_X86_XOP;
18142 for (uint32_t n = 5; n < 8; n++) {
18143 for (size_t k = 1; k <= 40; k += 9) {
18144 GemmMicrokernelTester()
18145 .mr(3)
18146 .nr(4)
18147 .kr(2)
18148 .sr(1)
18149 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018150 .n(n)
Marat Dukhan801d2c22021-06-02 21:25:05 -070018151 .k(k)
18152 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080018153 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070018154 }
18155 }
18156 }
18157
18158 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_div_4_small_kernel) {
18159 TEST_REQUIRES_X86_XOP;
18160 for (uint32_t n = 8; n <= 12; n += 4) {
18161 for (size_t k = 1; k <= 40; k += 9) {
18162 GemmMicrokernelTester()
18163 .mr(3)
18164 .nr(4)
18165 .kr(2)
18166 .sr(1)
18167 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018168 .n(n)
Marat Dukhan801d2c22021-06-02 21:25:05 -070018169 .k(k)
18170 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080018171 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070018172 }
18173 }
18174 }
18175
18176 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, strided_cm_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070018177 TEST_REQUIRES_X86_XOP;
18178 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018179 for (uint32_t n = 1; n <= 4; n++) {
18180 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070018181 GemmMicrokernelTester()
18182 .mr(3)
18183 .nr(4)
18184 .kr(2)
18185 .sr(1)
18186 .m(m)
18187 .n(n)
18188 .k(k)
18189 .cm_stride(7)
18190 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018191 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018192 }
18193 }
18194 }
18195 }
18196
Marat Dukhan801d2c22021-06-02 21:25:05 -070018197 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, a_offset) {
18198 TEST_REQUIRES_X86_XOP;
18199 for (size_t k = 1; k <= 40; k += 9) {
18200 GemmMicrokernelTester()
18201 .mr(3)
18202 .nr(4)
18203 .kr(2)
18204 .sr(1)
18205 .m(3)
18206 .n(4)
18207 .k(k)
18208 .ks(3)
18209 .a_offset(127)
Marat Dukhan50323b82022-01-11 00:12:01 -080018210 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070018211 }
18212 }
18213
18214 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, zero) {
18215 TEST_REQUIRES_X86_XOP;
Zhi An Ng83844ae2022-01-14 09:52:25 -080018216 for (size_t k = 1; k <= 40; k += 9) {
18217 for (uint32_t mz = 0; mz < 3; mz++) {
Marat Dukhan801d2c22021-06-02 21:25:05 -070018218 GemmMicrokernelTester()
18219 .mr(3)
18220 .nr(4)
18221 .kr(2)
18222 .sr(1)
18223 .m(3)
18224 .n(4)
18225 .k(k)
18226 .ks(3)
18227 .a_offset(127)
18228 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080018229 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070018230 }
18231 }
18232 }
18233
18234 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, qmin) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070018235 TEST_REQUIRES_X86_XOP;
18236 GemmMicrokernelTester()
18237 .mr(3)
18238 .nr(4)
18239 .kr(2)
18240 .sr(1)
18241 .m(3)
18242 .n(4)
18243 .k(8)
18244 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018245 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018246 }
18247
Marat Dukhan801d2c22021-06-02 21:25:05 -070018248 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, qmax) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070018249 TEST_REQUIRES_X86_XOP;
18250 GemmMicrokernelTester()
18251 .mr(3)
18252 .nr(4)
18253 .kr(2)
18254 .sr(1)
18255 .m(3)
18256 .n(4)
18257 .k(8)
18258 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018259 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018260 }
18261
Marat Dukhan801d2c22021-06-02 21:25:05 -070018262 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, strided_cm) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070018263 TEST_REQUIRES_X86_XOP;
18264 GemmMicrokernelTester()
18265 .mr(3)
18266 .nr(4)
18267 .kr(2)
18268 .sr(1)
18269 .m(3)
18270 .n(4)
18271 .k(8)
18272 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080018273 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018274 }
18275#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
18276
18277
18278#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan801d2c22021-06-02 21:25:05 -070018279 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_eq_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070018280 TEST_REQUIRES_X86_XOP;
18281 GemmMicrokernelTester()
18282 .mr(4)
18283 .nr(4)
18284 .kr(2)
18285 .sr(1)
18286 .m(4)
18287 .n(4)
18288 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080018289 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018290 }
18291
Marat Dukhan801d2c22021-06-02 21:25:05 -070018292 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070018293 TEST_REQUIRES_X86_XOP;
18294 GemmMicrokernelTester()
18295 .mr(4)
18296 .nr(4)
18297 .kr(2)
18298 .sr(1)
18299 .m(4)
18300 .n(4)
18301 .k(8)
18302 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080018303 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018304 }
18305
Marat Dukhan801d2c22021-06-02 21:25:05 -070018306 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_eq_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070018307 TEST_REQUIRES_X86_XOP;
Zhi An Ng83844ae2022-01-14 09:52:25 -080018308 for (uint32_t n = 1; n <= 4; n++) {
18309 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070018310 GemmMicrokernelTester()
18311 .mr(4)
18312 .nr(4)
18313 .kr(2)
18314 .sr(1)
18315 .m(m)
18316 .n(n)
18317 .k(8)
18318 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018319 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018320 }
18321 }
18322 }
18323
Marat Dukhan801d2c22021-06-02 21:25:05 -070018324 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_eq_8_subtile_m) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070018325 TEST_REQUIRES_X86_XOP;
18326 for (uint32_t m = 1; m <= 4; m++) {
18327 GemmMicrokernelTester()
18328 .mr(4)
18329 .nr(4)
18330 .kr(2)
18331 .sr(1)
18332 .m(m)
18333 .n(4)
18334 .k(8)
18335 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018336 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018337 }
18338 }
18339
Marat Dukhan801d2c22021-06-02 21:25:05 -070018340 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_eq_8_subtile_n) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070018341 TEST_REQUIRES_X86_XOP;
18342 for (uint32_t n = 1; n <= 4; n++) {
18343 GemmMicrokernelTester()
18344 .mr(4)
18345 .nr(4)
18346 .kr(2)
18347 .sr(1)
18348 .m(4)
18349 .n(n)
18350 .k(8)
18351 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018352 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018353 }
18354 }
18355
Marat Dukhan801d2c22021-06-02 21:25:05 -070018356 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_lt_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070018357 TEST_REQUIRES_X86_XOP;
18358 for (size_t k = 1; k < 8; k++) {
18359 GemmMicrokernelTester()
18360 .mr(4)
18361 .nr(4)
18362 .kr(2)
18363 .sr(1)
18364 .m(4)
18365 .n(4)
18366 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080018367 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018368 }
18369 }
18370
Marat Dukhan801d2c22021-06-02 21:25:05 -070018371 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_lt_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070018372 TEST_REQUIRES_X86_XOP;
18373 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018374 for (uint32_t n = 1; n <= 4; n++) {
18375 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070018376 GemmMicrokernelTester()
18377 .mr(4)
18378 .nr(4)
18379 .kr(2)
18380 .sr(1)
18381 .m(m)
18382 .n(n)
18383 .k(k)
18384 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018385 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018386 }
18387 }
18388 }
18389 }
18390
Marat Dukhan801d2c22021-06-02 21:25:05 -070018391 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_gt_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070018392 TEST_REQUIRES_X86_XOP;
18393 for (size_t k = 9; k < 16; k++) {
18394 GemmMicrokernelTester()
18395 .mr(4)
18396 .nr(4)
18397 .kr(2)
18398 .sr(1)
18399 .m(4)
18400 .n(4)
18401 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080018402 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018403 }
18404 }
18405
Marat Dukhan801d2c22021-06-02 21:25:05 -070018406 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_gt_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070018407 TEST_REQUIRES_X86_XOP;
18408 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018409 for (uint32_t n = 1; n <= 4; n++) {
18410 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070018411 GemmMicrokernelTester()
18412 .mr(4)
18413 .nr(4)
18414 .kr(2)
18415 .sr(1)
18416 .m(m)
18417 .n(n)
18418 .k(k)
18419 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018420 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018421 }
18422 }
18423 }
18424 }
18425
Marat Dukhan801d2c22021-06-02 21:25:05 -070018426 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_div_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070018427 TEST_REQUIRES_X86_XOP;
18428 for (size_t k = 16; k <= 80; k += 8) {
18429 GemmMicrokernelTester()
18430 .mr(4)
18431 .nr(4)
18432 .kr(2)
18433 .sr(1)
18434 .m(4)
18435 .n(4)
18436 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080018437 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018438 }
18439 }
18440
Marat Dukhan801d2c22021-06-02 21:25:05 -070018441 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_div_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070018442 TEST_REQUIRES_X86_XOP;
18443 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018444 for (uint32_t n = 1; n <= 4; n++) {
18445 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070018446 GemmMicrokernelTester()
18447 .mr(4)
18448 .nr(4)
18449 .kr(2)
18450 .sr(1)
18451 .m(m)
18452 .n(n)
18453 .k(k)
18454 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018455 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018456 }
18457 }
18458 }
18459 }
18460
Marat Dukhan801d2c22021-06-02 21:25:05 -070018461 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_gt_4) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070018462 TEST_REQUIRES_X86_XOP;
18463 for (uint32_t n = 5; n < 8; n++) {
18464 for (size_t k = 1; k <= 40; k += 9) {
18465 GemmMicrokernelTester()
18466 .mr(4)
18467 .nr(4)
18468 .kr(2)
18469 .sr(1)
18470 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018471 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070018472 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080018473 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018474 }
18475 }
18476 }
18477
Marat Dukhan801d2c22021-06-02 21:25:05 -070018478 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_gt_4_strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070018479 TEST_REQUIRES_X86_XOP;
18480 for (uint32_t n = 5; n < 8; n++) {
18481 for (size_t k = 1; k <= 40; k += 9) {
18482 GemmMicrokernelTester()
18483 .mr(4)
18484 .nr(4)
18485 .kr(2)
18486 .sr(1)
18487 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018488 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070018489 .k(k)
18490 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080018491 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018492 }
18493 }
18494 }
18495
Marat Dukhan801d2c22021-06-02 21:25:05 -070018496 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_gt_4_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070018497 TEST_REQUIRES_X86_XOP;
18498 for (uint32_t n = 5; n < 8; n++) {
18499 for (size_t k = 1; k <= 40; k += 9) {
18500 for (uint32_t m = 1; m <= 4; m++) {
18501 GemmMicrokernelTester()
18502 .mr(4)
18503 .nr(4)
18504 .kr(2)
18505 .sr(1)
18506 .m(m)
18507 .n(n)
18508 .k(k)
18509 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018510 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018511 }
18512 }
18513 }
18514 }
18515
Marat Dukhan801d2c22021-06-02 21:25:05 -070018516 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_div_4) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070018517 TEST_REQUIRES_X86_XOP;
18518 for (uint32_t n = 8; n <= 12; n += 4) {
18519 for (size_t k = 1; k <= 40; k += 9) {
18520 GemmMicrokernelTester()
18521 .mr(4)
18522 .nr(4)
18523 .kr(2)
18524 .sr(1)
18525 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018526 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070018527 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080018528 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018529 }
18530 }
18531 }
18532
Marat Dukhan801d2c22021-06-02 21:25:05 -070018533 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_div_4_strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070018534 TEST_REQUIRES_X86_XOP;
18535 for (uint32_t n = 8; n <= 12; n += 4) {
18536 for (size_t k = 1; k <= 40; k += 9) {
18537 GemmMicrokernelTester()
18538 .mr(4)
18539 .nr(4)
18540 .kr(2)
18541 .sr(1)
18542 .m(4)
18543 .n(n)
18544 .k(k)
18545 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080018546 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018547 }
18548 }
18549 }
18550
Marat Dukhan801d2c22021-06-02 21:25:05 -070018551 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_div_4_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070018552 TEST_REQUIRES_X86_XOP;
18553 for (uint32_t n = 8; n <= 12; n += 4) {
18554 for (size_t k = 1; k <= 40; k += 9) {
18555 for (uint32_t m = 1; m <= 4; m++) {
18556 GemmMicrokernelTester()
18557 .mr(4)
18558 .nr(4)
18559 .kr(2)
18560 .sr(1)
18561 .m(m)
18562 .n(n)
18563 .k(k)
18564 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018565 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018566 }
18567 }
18568 }
18569 }
18570
Marat Dukhan801d2c22021-06-02 21:25:05 -070018571 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, small_kernel) {
18572 TEST_REQUIRES_X86_XOP;
18573 for (size_t k = 1; k <= 40; k += 9) {
18574 GemmMicrokernelTester()
18575 .mr(4)
18576 .nr(4)
18577 .kr(2)
18578 .sr(1)
18579 .m(4)
18580 .n(4)
18581 .k(k)
18582 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080018583 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070018584 }
18585 }
18586
18587 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, small_kernel_subtile) {
18588 TEST_REQUIRES_X86_XOP;
18589 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018590 for (uint32_t n = 1; n <= 4; n++) {
18591 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan801d2c22021-06-02 21:25:05 -070018592 GemmMicrokernelTester()
18593 .mr(4)
18594 .nr(4)
18595 .kr(2)
18596 .sr(1)
18597 .m(m)
18598 .n(n)
18599 .k(k)
18600 .ks(3)
18601 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018602 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070018603 }
18604 }
18605 }
18606 }
18607
18608 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_gt_4_small_kernel) {
18609 TEST_REQUIRES_X86_XOP;
18610 for (uint32_t n = 5; n < 8; n++) {
18611 for (size_t k = 1; k <= 40; k += 9) {
18612 GemmMicrokernelTester()
18613 .mr(4)
18614 .nr(4)
18615 .kr(2)
18616 .sr(1)
18617 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018618 .n(n)
Marat Dukhan801d2c22021-06-02 21:25:05 -070018619 .k(k)
18620 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080018621 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070018622 }
18623 }
18624 }
18625
18626 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_div_4_small_kernel) {
18627 TEST_REQUIRES_X86_XOP;
18628 for (uint32_t n = 8; n <= 12; n += 4) {
18629 for (size_t k = 1; k <= 40; k += 9) {
18630 GemmMicrokernelTester()
18631 .mr(4)
18632 .nr(4)
18633 .kr(2)
18634 .sr(1)
18635 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018636 .n(n)
Marat Dukhan801d2c22021-06-02 21:25:05 -070018637 .k(k)
18638 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080018639 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070018640 }
18641 }
18642 }
18643
18644 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, strided_cm_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070018645 TEST_REQUIRES_X86_XOP;
18646 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018647 for (uint32_t n = 1; n <= 4; n++) {
18648 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070018649 GemmMicrokernelTester()
18650 .mr(4)
18651 .nr(4)
18652 .kr(2)
18653 .sr(1)
18654 .m(m)
18655 .n(n)
18656 .k(k)
18657 .cm_stride(7)
18658 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018659 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018660 }
18661 }
18662 }
18663 }
18664
Marat Dukhan801d2c22021-06-02 21:25:05 -070018665 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, a_offset) {
18666 TEST_REQUIRES_X86_XOP;
18667 for (size_t k = 1; k <= 40; k += 9) {
18668 GemmMicrokernelTester()
18669 .mr(4)
18670 .nr(4)
18671 .kr(2)
18672 .sr(1)
18673 .m(4)
18674 .n(4)
18675 .k(k)
18676 .ks(3)
18677 .a_offset(163)
Marat Dukhan50323b82022-01-11 00:12:01 -080018678 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070018679 }
18680 }
18681
18682 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, zero) {
18683 TEST_REQUIRES_X86_XOP;
Zhi An Ng83844ae2022-01-14 09:52:25 -080018684 for (size_t k = 1; k <= 40; k += 9) {
18685 for (uint32_t mz = 0; mz < 4; mz++) {
Marat Dukhan801d2c22021-06-02 21:25:05 -070018686 GemmMicrokernelTester()
18687 .mr(4)
18688 .nr(4)
18689 .kr(2)
18690 .sr(1)
18691 .m(4)
18692 .n(4)
18693 .k(k)
18694 .ks(3)
18695 .a_offset(163)
18696 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080018697 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070018698 }
18699 }
18700 }
18701
18702 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, qmin) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070018703 TEST_REQUIRES_X86_XOP;
18704 GemmMicrokernelTester()
18705 .mr(4)
18706 .nr(4)
18707 .kr(2)
18708 .sr(1)
18709 .m(4)
18710 .n(4)
18711 .k(8)
18712 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018713 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018714 }
18715
Marat Dukhan801d2c22021-06-02 21:25:05 -070018716 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, qmax) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070018717 TEST_REQUIRES_X86_XOP;
18718 GemmMicrokernelTester()
18719 .mr(4)
18720 .nr(4)
18721 .kr(2)
18722 .sr(1)
18723 .m(4)
18724 .n(4)
18725 .k(8)
18726 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018727 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018728 }
18729
Marat Dukhan801d2c22021-06-02 21:25:05 -070018730 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, strided_cm) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070018731 TEST_REQUIRES_X86_XOP;
18732 GemmMicrokernelTester()
18733 .mr(4)
18734 .nr(4)
18735 .kr(2)
18736 .sr(1)
18737 .m(4)
18738 .n(4)
18739 .k(8)
18740 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080018741 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018742 }
18743#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
18744
18745
18746#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan801d2c22021-06-02 21:25:05 -070018747 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_eq_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070018748 TEST_REQUIRES_X86_SSE2;
18749 GemmMicrokernelTester()
18750 .mr(1)
18751 .nr(4)
18752 .kr(2)
18753 .sr(1)
18754 .m(1)
18755 .n(4)
18756 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080018757 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018758 }
18759
Marat Dukhan801d2c22021-06-02 21:25:05 -070018760 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070018761 TEST_REQUIRES_X86_SSE2;
18762 GemmMicrokernelTester()
18763 .mr(1)
18764 .nr(4)
18765 .kr(2)
18766 .sr(1)
18767 .m(1)
18768 .n(4)
18769 .k(8)
18770 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080018771 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018772 }
18773
Marat Dukhan801d2c22021-06-02 21:25:05 -070018774 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_eq_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070018775 TEST_REQUIRES_X86_SSE2;
Zhi An Ng83844ae2022-01-14 09:52:25 -080018776 for (uint32_t n = 1; n <= 4; n++) {
18777 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070018778 GemmMicrokernelTester()
18779 .mr(1)
18780 .nr(4)
18781 .kr(2)
18782 .sr(1)
18783 .m(m)
18784 .n(n)
18785 .k(8)
18786 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018787 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018788 }
18789 }
18790 }
18791
Marat Dukhan801d2c22021-06-02 21:25:05 -070018792 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_eq_8_subtile_m) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070018793 TEST_REQUIRES_X86_SSE2;
18794 for (uint32_t m = 1; m <= 1; m++) {
18795 GemmMicrokernelTester()
18796 .mr(1)
18797 .nr(4)
18798 .kr(2)
18799 .sr(1)
18800 .m(m)
18801 .n(4)
18802 .k(8)
18803 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018804 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018805 }
18806 }
18807
Marat Dukhan801d2c22021-06-02 21:25:05 -070018808 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_eq_8_subtile_n) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070018809 TEST_REQUIRES_X86_SSE2;
18810 for (uint32_t n = 1; n <= 4; n++) {
18811 GemmMicrokernelTester()
18812 .mr(1)
18813 .nr(4)
18814 .kr(2)
18815 .sr(1)
18816 .m(1)
18817 .n(n)
18818 .k(8)
18819 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018820 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018821 }
18822 }
18823
Marat Dukhan801d2c22021-06-02 21:25:05 -070018824 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_lt_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070018825 TEST_REQUIRES_X86_SSE2;
18826 for (size_t k = 1; k < 8; k++) {
18827 GemmMicrokernelTester()
18828 .mr(1)
18829 .nr(4)
18830 .kr(2)
18831 .sr(1)
18832 .m(1)
18833 .n(4)
18834 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080018835 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018836 }
18837 }
18838
Marat Dukhan801d2c22021-06-02 21:25:05 -070018839 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_lt_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070018840 TEST_REQUIRES_X86_SSE2;
18841 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018842 for (uint32_t n = 1; n <= 4; n++) {
18843 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070018844 GemmMicrokernelTester()
18845 .mr(1)
18846 .nr(4)
18847 .kr(2)
18848 .sr(1)
18849 .m(m)
18850 .n(n)
18851 .k(k)
18852 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018853 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018854 }
18855 }
18856 }
18857 }
18858
Marat Dukhan801d2c22021-06-02 21:25:05 -070018859 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_gt_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070018860 TEST_REQUIRES_X86_SSE2;
18861 for (size_t k = 9; k < 16; k++) {
18862 GemmMicrokernelTester()
18863 .mr(1)
18864 .nr(4)
18865 .kr(2)
18866 .sr(1)
18867 .m(1)
18868 .n(4)
18869 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080018870 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018871 }
18872 }
18873
Marat Dukhan801d2c22021-06-02 21:25:05 -070018874 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_gt_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070018875 TEST_REQUIRES_X86_SSE2;
18876 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018877 for (uint32_t n = 1; n <= 4; n++) {
18878 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070018879 GemmMicrokernelTester()
18880 .mr(1)
18881 .nr(4)
18882 .kr(2)
18883 .sr(1)
18884 .m(m)
18885 .n(n)
18886 .k(k)
18887 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018888 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018889 }
18890 }
18891 }
18892 }
18893
Marat Dukhan801d2c22021-06-02 21:25:05 -070018894 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_div_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070018895 TEST_REQUIRES_X86_SSE2;
18896 for (size_t k = 16; k <= 80; k += 8) {
18897 GemmMicrokernelTester()
18898 .mr(1)
18899 .nr(4)
18900 .kr(2)
18901 .sr(1)
18902 .m(1)
18903 .n(4)
18904 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080018905 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018906 }
18907 }
18908
Marat Dukhan801d2c22021-06-02 21:25:05 -070018909 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_div_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070018910 TEST_REQUIRES_X86_SSE2;
18911 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018912 for (uint32_t n = 1; n <= 4; n++) {
18913 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070018914 GemmMicrokernelTester()
18915 .mr(1)
18916 .nr(4)
18917 .kr(2)
18918 .sr(1)
18919 .m(m)
18920 .n(n)
18921 .k(k)
18922 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018923 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018924 }
18925 }
18926 }
18927 }
18928
Marat Dukhan801d2c22021-06-02 21:25:05 -070018929 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_gt_4) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070018930 TEST_REQUIRES_X86_SSE2;
18931 for (uint32_t n = 5; n < 8; n++) {
18932 for (size_t k = 1; k <= 40; k += 9) {
18933 GemmMicrokernelTester()
18934 .mr(1)
18935 .nr(4)
18936 .kr(2)
18937 .sr(1)
18938 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018939 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070018940 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080018941 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018942 }
18943 }
18944 }
18945
Marat Dukhan801d2c22021-06-02 21:25:05 -070018946 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_gt_4_strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070018947 TEST_REQUIRES_X86_SSE2;
18948 for (uint32_t n = 5; n < 8; n++) {
18949 for (size_t k = 1; k <= 40; k += 9) {
18950 GemmMicrokernelTester()
18951 .mr(1)
18952 .nr(4)
18953 .kr(2)
18954 .sr(1)
18955 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018956 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070018957 .k(k)
18958 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080018959 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018960 }
18961 }
18962 }
18963
Marat Dukhan801d2c22021-06-02 21:25:05 -070018964 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_gt_4_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070018965 TEST_REQUIRES_X86_SSE2;
18966 for (uint32_t n = 5; n < 8; n++) {
18967 for (size_t k = 1; k <= 40; k += 9) {
18968 for (uint32_t m = 1; m <= 1; m++) {
18969 GemmMicrokernelTester()
18970 .mr(1)
18971 .nr(4)
18972 .kr(2)
18973 .sr(1)
18974 .m(m)
18975 .n(n)
18976 .k(k)
18977 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018978 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018979 }
18980 }
18981 }
18982 }
18983
Marat Dukhan801d2c22021-06-02 21:25:05 -070018984 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_div_4) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070018985 TEST_REQUIRES_X86_SSE2;
18986 for (uint32_t n = 8; n <= 12; n += 4) {
18987 for (size_t k = 1; k <= 40; k += 9) {
18988 GemmMicrokernelTester()
18989 .mr(1)
18990 .nr(4)
18991 .kr(2)
18992 .sr(1)
18993 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018994 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070018995 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080018996 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018997 }
18998 }
18999 }
19000
Marat Dukhan801d2c22021-06-02 21:25:05 -070019001 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_div_4_strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070019002 TEST_REQUIRES_X86_SSE2;
19003 for (uint32_t n = 8; n <= 12; n += 4) {
19004 for (size_t k = 1; k <= 40; k += 9) {
19005 GemmMicrokernelTester()
19006 .mr(1)
19007 .nr(4)
19008 .kr(2)
19009 .sr(1)
19010 .m(1)
19011 .n(n)
19012 .k(k)
19013 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080019014 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019015 }
19016 }
19017 }
19018
Marat Dukhan801d2c22021-06-02 21:25:05 -070019019 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_div_4_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070019020 TEST_REQUIRES_X86_SSE2;
19021 for (uint32_t n = 8; n <= 12; n += 4) {
19022 for (size_t k = 1; k <= 40; k += 9) {
19023 for (uint32_t m = 1; m <= 1; m++) {
19024 GemmMicrokernelTester()
19025 .mr(1)
19026 .nr(4)
19027 .kr(2)
19028 .sr(1)
19029 .m(m)
19030 .n(n)
19031 .k(k)
19032 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019033 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019034 }
19035 }
19036 }
19037 }
19038
Marat Dukhan801d2c22021-06-02 21:25:05 -070019039 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, small_kernel) {
19040 TEST_REQUIRES_X86_SSE2;
19041 for (size_t k = 1; k <= 40; k += 9) {
19042 GemmMicrokernelTester()
19043 .mr(1)
19044 .nr(4)
19045 .kr(2)
19046 .sr(1)
19047 .m(1)
19048 .n(4)
19049 .k(k)
19050 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080019051 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070019052 }
19053 }
19054
19055 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, small_kernel_subtile) {
19056 TEST_REQUIRES_X86_SSE2;
19057 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019058 for (uint32_t n = 1; n <= 4; n++) {
19059 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan801d2c22021-06-02 21:25:05 -070019060 GemmMicrokernelTester()
19061 .mr(1)
19062 .nr(4)
19063 .kr(2)
19064 .sr(1)
19065 .m(m)
19066 .n(n)
19067 .k(k)
19068 .ks(3)
19069 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019070 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070019071 }
19072 }
19073 }
19074 }
19075
19076 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_gt_4_small_kernel) {
19077 TEST_REQUIRES_X86_SSE2;
19078 for (uint32_t n = 5; n < 8; n++) {
19079 for (size_t k = 1; k <= 40; k += 9) {
19080 GemmMicrokernelTester()
19081 .mr(1)
19082 .nr(4)
19083 .kr(2)
19084 .sr(1)
19085 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019086 .n(n)
Marat Dukhan801d2c22021-06-02 21:25:05 -070019087 .k(k)
19088 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080019089 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070019090 }
19091 }
19092 }
19093
19094 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_div_4_small_kernel) {
19095 TEST_REQUIRES_X86_SSE2;
19096 for (uint32_t n = 8; n <= 12; n += 4) {
19097 for (size_t k = 1; k <= 40; k += 9) {
19098 GemmMicrokernelTester()
19099 .mr(1)
19100 .nr(4)
19101 .kr(2)
19102 .sr(1)
19103 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019104 .n(n)
Marat Dukhan801d2c22021-06-02 21:25:05 -070019105 .k(k)
19106 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080019107 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070019108 }
19109 }
19110 }
19111
19112 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, strided_cm_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070019113 TEST_REQUIRES_X86_SSE2;
19114 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019115 for (uint32_t n = 1; n <= 4; n++) {
19116 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070019117 GemmMicrokernelTester()
19118 .mr(1)
19119 .nr(4)
19120 .kr(2)
19121 .sr(1)
19122 .m(m)
19123 .n(n)
19124 .k(k)
19125 .cm_stride(7)
19126 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019127 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019128 }
19129 }
19130 }
19131 }
19132
Marat Dukhan801d2c22021-06-02 21:25:05 -070019133 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, a_offset) {
19134 TEST_REQUIRES_X86_SSE2;
19135 for (size_t k = 1; k <= 40; k += 9) {
19136 GemmMicrokernelTester()
19137 .mr(1)
19138 .nr(4)
19139 .kr(2)
19140 .sr(1)
19141 .m(1)
19142 .n(4)
19143 .k(k)
19144 .ks(3)
19145 .a_offset(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080019146 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070019147 }
19148 }
19149
19150 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, zero) {
19151 TEST_REQUIRES_X86_SSE2;
Zhi An Ng83844ae2022-01-14 09:52:25 -080019152 for (size_t k = 1; k <= 40; k += 9) {
19153 for (uint32_t mz = 0; mz < 1; mz++) {
Marat Dukhan801d2c22021-06-02 21:25:05 -070019154 GemmMicrokernelTester()
19155 .mr(1)
19156 .nr(4)
19157 .kr(2)
19158 .sr(1)
19159 .m(1)
19160 .n(4)
19161 .k(k)
19162 .ks(3)
19163 .a_offset(43)
19164 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080019165 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070019166 }
19167 }
19168 }
19169
19170 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, qmin) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070019171 TEST_REQUIRES_X86_SSE2;
19172 GemmMicrokernelTester()
19173 .mr(1)
19174 .nr(4)
19175 .kr(2)
19176 .sr(1)
19177 .m(1)
19178 .n(4)
19179 .k(8)
19180 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019181 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019182 }
19183
Marat Dukhan801d2c22021-06-02 21:25:05 -070019184 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, qmax) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070019185 TEST_REQUIRES_X86_SSE2;
19186 GemmMicrokernelTester()
19187 .mr(1)
19188 .nr(4)
19189 .kr(2)
19190 .sr(1)
19191 .m(1)
19192 .n(4)
19193 .k(8)
19194 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019195 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019196 }
19197
Marat Dukhan801d2c22021-06-02 21:25:05 -070019198 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, strided_cm) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070019199 TEST_REQUIRES_X86_SSE2;
19200 GemmMicrokernelTester()
19201 .mr(1)
19202 .nr(4)
19203 .kr(2)
19204 .sr(1)
19205 .m(1)
19206 .n(4)
19207 .k(8)
19208 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080019209 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019210 }
19211#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
19212
19213
19214#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan801d2c22021-06-02 21:25:05 -070019215 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_eq_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070019216 TEST_REQUIRES_X86_SSE2;
19217 GemmMicrokernelTester()
19218 .mr(2)
19219 .nr(4)
19220 .kr(2)
19221 .sr(1)
19222 .m(2)
19223 .n(4)
19224 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080019225 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019226 }
19227
Marat Dukhan801d2c22021-06-02 21:25:05 -070019228 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070019229 TEST_REQUIRES_X86_SSE2;
19230 GemmMicrokernelTester()
19231 .mr(2)
19232 .nr(4)
19233 .kr(2)
19234 .sr(1)
19235 .m(2)
19236 .n(4)
19237 .k(8)
19238 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080019239 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019240 }
19241
Marat Dukhan801d2c22021-06-02 21:25:05 -070019242 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_eq_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070019243 TEST_REQUIRES_X86_SSE2;
Zhi An Ng83844ae2022-01-14 09:52:25 -080019244 for (uint32_t n = 1; n <= 4; n++) {
19245 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070019246 GemmMicrokernelTester()
19247 .mr(2)
19248 .nr(4)
19249 .kr(2)
19250 .sr(1)
19251 .m(m)
19252 .n(n)
19253 .k(8)
19254 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019255 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019256 }
19257 }
19258 }
19259
Marat Dukhan801d2c22021-06-02 21:25:05 -070019260 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_eq_8_subtile_m) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070019261 TEST_REQUIRES_X86_SSE2;
19262 for (uint32_t m = 1; m <= 2; m++) {
19263 GemmMicrokernelTester()
19264 .mr(2)
19265 .nr(4)
19266 .kr(2)
19267 .sr(1)
19268 .m(m)
19269 .n(4)
19270 .k(8)
19271 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019272 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019273 }
19274 }
19275
Marat Dukhan801d2c22021-06-02 21:25:05 -070019276 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_eq_8_subtile_n) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070019277 TEST_REQUIRES_X86_SSE2;
19278 for (uint32_t n = 1; n <= 4; n++) {
19279 GemmMicrokernelTester()
19280 .mr(2)
19281 .nr(4)
19282 .kr(2)
19283 .sr(1)
19284 .m(2)
19285 .n(n)
19286 .k(8)
19287 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019288 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019289 }
19290 }
19291
Marat Dukhan801d2c22021-06-02 21:25:05 -070019292 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_lt_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070019293 TEST_REQUIRES_X86_SSE2;
19294 for (size_t k = 1; k < 8; k++) {
19295 GemmMicrokernelTester()
19296 .mr(2)
19297 .nr(4)
19298 .kr(2)
19299 .sr(1)
19300 .m(2)
19301 .n(4)
19302 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080019303 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019304 }
19305 }
19306
Marat Dukhan801d2c22021-06-02 21:25:05 -070019307 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_lt_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070019308 TEST_REQUIRES_X86_SSE2;
19309 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019310 for (uint32_t n = 1; n <= 4; n++) {
19311 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070019312 GemmMicrokernelTester()
19313 .mr(2)
19314 .nr(4)
19315 .kr(2)
19316 .sr(1)
19317 .m(m)
19318 .n(n)
19319 .k(k)
19320 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019321 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019322 }
19323 }
19324 }
19325 }
19326
Marat Dukhan801d2c22021-06-02 21:25:05 -070019327 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_gt_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070019328 TEST_REQUIRES_X86_SSE2;
19329 for (size_t k = 9; k < 16; k++) {
19330 GemmMicrokernelTester()
19331 .mr(2)
19332 .nr(4)
19333 .kr(2)
19334 .sr(1)
19335 .m(2)
19336 .n(4)
19337 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080019338 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019339 }
19340 }
19341
Marat Dukhan801d2c22021-06-02 21:25:05 -070019342 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_gt_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070019343 TEST_REQUIRES_X86_SSE2;
19344 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019345 for (uint32_t n = 1; n <= 4; n++) {
19346 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070019347 GemmMicrokernelTester()
19348 .mr(2)
19349 .nr(4)
19350 .kr(2)
19351 .sr(1)
19352 .m(m)
19353 .n(n)
19354 .k(k)
19355 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019356 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019357 }
19358 }
19359 }
19360 }
19361
Marat Dukhan801d2c22021-06-02 21:25:05 -070019362 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_div_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070019363 TEST_REQUIRES_X86_SSE2;
19364 for (size_t k = 16; k <= 80; k += 8) {
19365 GemmMicrokernelTester()
19366 .mr(2)
19367 .nr(4)
19368 .kr(2)
19369 .sr(1)
19370 .m(2)
19371 .n(4)
19372 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080019373 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019374 }
19375 }
19376
Marat Dukhan801d2c22021-06-02 21:25:05 -070019377 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_div_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070019378 TEST_REQUIRES_X86_SSE2;
19379 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019380 for (uint32_t n = 1; n <= 4; n++) {
19381 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070019382 GemmMicrokernelTester()
19383 .mr(2)
19384 .nr(4)
19385 .kr(2)
19386 .sr(1)
19387 .m(m)
19388 .n(n)
19389 .k(k)
19390 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019391 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019392 }
19393 }
19394 }
19395 }
19396
Marat Dukhan801d2c22021-06-02 21:25:05 -070019397 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_gt_4) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070019398 TEST_REQUIRES_X86_SSE2;
19399 for (uint32_t n = 5; n < 8; n++) {
19400 for (size_t k = 1; k <= 40; k += 9) {
19401 GemmMicrokernelTester()
19402 .mr(2)
19403 .nr(4)
19404 .kr(2)
19405 .sr(1)
19406 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019407 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070019408 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080019409 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019410 }
19411 }
19412 }
19413
Marat Dukhan801d2c22021-06-02 21:25:05 -070019414 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_gt_4_strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070019415 TEST_REQUIRES_X86_SSE2;
19416 for (uint32_t n = 5; n < 8; n++) {
19417 for (size_t k = 1; k <= 40; k += 9) {
19418 GemmMicrokernelTester()
19419 .mr(2)
19420 .nr(4)
19421 .kr(2)
19422 .sr(1)
19423 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019424 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070019425 .k(k)
19426 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080019427 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019428 }
19429 }
19430 }
19431
Marat Dukhan801d2c22021-06-02 21:25:05 -070019432 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_gt_4_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070019433 TEST_REQUIRES_X86_SSE2;
19434 for (uint32_t n = 5; n < 8; n++) {
19435 for (size_t k = 1; k <= 40; k += 9) {
19436 for (uint32_t m = 1; m <= 2; m++) {
19437 GemmMicrokernelTester()
19438 .mr(2)
19439 .nr(4)
19440 .kr(2)
19441 .sr(1)
19442 .m(m)
19443 .n(n)
19444 .k(k)
19445 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019446 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019447 }
19448 }
19449 }
19450 }
19451
Marat Dukhan801d2c22021-06-02 21:25:05 -070019452 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_div_4) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070019453 TEST_REQUIRES_X86_SSE2;
19454 for (uint32_t n = 8; n <= 12; n += 4) {
19455 for (size_t k = 1; k <= 40; k += 9) {
19456 GemmMicrokernelTester()
19457 .mr(2)
19458 .nr(4)
19459 .kr(2)
19460 .sr(1)
19461 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019462 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070019463 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080019464 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019465 }
19466 }
19467 }
19468
Marat Dukhan801d2c22021-06-02 21:25:05 -070019469 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_div_4_strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070019470 TEST_REQUIRES_X86_SSE2;
19471 for (uint32_t n = 8; n <= 12; n += 4) {
19472 for (size_t k = 1; k <= 40; k += 9) {
19473 GemmMicrokernelTester()
19474 .mr(2)
19475 .nr(4)
19476 .kr(2)
19477 .sr(1)
19478 .m(2)
19479 .n(n)
19480 .k(k)
19481 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080019482 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019483 }
19484 }
19485 }
19486
Marat Dukhan801d2c22021-06-02 21:25:05 -070019487 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_div_4_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070019488 TEST_REQUIRES_X86_SSE2;
19489 for (uint32_t n = 8; n <= 12; n += 4) {
19490 for (size_t k = 1; k <= 40; k += 9) {
19491 for (uint32_t m = 1; m <= 2; m++) {
19492 GemmMicrokernelTester()
19493 .mr(2)
19494 .nr(4)
19495 .kr(2)
19496 .sr(1)
19497 .m(m)
19498 .n(n)
19499 .k(k)
19500 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019501 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019502 }
19503 }
19504 }
19505 }
19506
Marat Dukhan801d2c22021-06-02 21:25:05 -070019507 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, small_kernel) {
19508 TEST_REQUIRES_X86_SSE2;
19509 for (size_t k = 1; k <= 40; k += 9) {
19510 GemmMicrokernelTester()
19511 .mr(2)
19512 .nr(4)
19513 .kr(2)
19514 .sr(1)
19515 .m(2)
19516 .n(4)
19517 .k(k)
19518 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080019519 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070019520 }
19521 }
19522
19523 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, small_kernel_subtile) {
19524 TEST_REQUIRES_X86_SSE2;
19525 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019526 for (uint32_t n = 1; n <= 4; n++) {
19527 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan801d2c22021-06-02 21:25:05 -070019528 GemmMicrokernelTester()
19529 .mr(2)
19530 .nr(4)
19531 .kr(2)
19532 .sr(1)
19533 .m(m)
19534 .n(n)
19535 .k(k)
19536 .ks(3)
19537 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019538 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070019539 }
19540 }
19541 }
19542 }
19543
19544 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_gt_4_small_kernel) {
19545 TEST_REQUIRES_X86_SSE2;
19546 for (uint32_t n = 5; n < 8; n++) {
19547 for (size_t k = 1; k <= 40; k += 9) {
19548 GemmMicrokernelTester()
19549 .mr(2)
19550 .nr(4)
19551 .kr(2)
19552 .sr(1)
19553 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019554 .n(n)
Marat Dukhan801d2c22021-06-02 21:25:05 -070019555 .k(k)
19556 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080019557 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070019558 }
19559 }
19560 }
19561
19562 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_div_4_small_kernel) {
19563 TEST_REQUIRES_X86_SSE2;
19564 for (uint32_t n = 8; n <= 12; n += 4) {
19565 for (size_t k = 1; k <= 40; k += 9) {
19566 GemmMicrokernelTester()
19567 .mr(2)
19568 .nr(4)
19569 .kr(2)
19570 .sr(1)
19571 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019572 .n(n)
Marat Dukhan801d2c22021-06-02 21:25:05 -070019573 .k(k)
19574 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080019575 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070019576 }
19577 }
19578 }
19579
19580 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, strided_cm_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070019581 TEST_REQUIRES_X86_SSE2;
19582 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019583 for (uint32_t n = 1; n <= 4; n++) {
19584 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070019585 GemmMicrokernelTester()
19586 .mr(2)
19587 .nr(4)
19588 .kr(2)
19589 .sr(1)
19590 .m(m)
19591 .n(n)
19592 .k(k)
19593 .cm_stride(7)
19594 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019595 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019596 }
19597 }
19598 }
19599 }
19600
Marat Dukhan801d2c22021-06-02 21:25:05 -070019601 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, a_offset) {
19602 TEST_REQUIRES_X86_SSE2;
19603 for (size_t k = 1; k <= 40; k += 9) {
19604 GemmMicrokernelTester()
19605 .mr(2)
19606 .nr(4)
19607 .kr(2)
19608 .sr(1)
19609 .m(2)
19610 .n(4)
19611 .k(k)
19612 .ks(3)
19613 .a_offset(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080019614 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070019615 }
19616 }
19617
19618 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, zero) {
19619 TEST_REQUIRES_X86_SSE2;
Zhi An Ng83844ae2022-01-14 09:52:25 -080019620 for (size_t k = 1; k <= 40; k += 9) {
19621 for (uint32_t mz = 0; mz < 2; mz++) {
Marat Dukhan801d2c22021-06-02 21:25:05 -070019622 GemmMicrokernelTester()
19623 .mr(2)
19624 .nr(4)
19625 .kr(2)
19626 .sr(1)
19627 .m(2)
19628 .n(4)
19629 .k(k)
19630 .ks(3)
19631 .a_offset(83)
19632 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080019633 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070019634 }
19635 }
19636 }
19637
19638 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, qmin) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070019639 TEST_REQUIRES_X86_SSE2;
19640 GemmMicrokernelTester()
19641 .mr(2)
19642 .nr(4)
19643 .kr(2)
19644 .sr(1)
19645 .m(2)
19646 .n(4)
19647 .k(8)
19648 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019649 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019650 }
19651
Marat Dukhan801d2c22021-06-02 21:25:05 -070019652 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, qmax) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070019653 TEST_REQUIRES_X86_SSE2;
19654 GemmMicrokernelTester()
19655 .mr(2)
19656 .nr(4)
19657 .kr(2)
19658 .sr(1)
19659 .m(2)
19660 .n(4)
19661 .k(8)
19662 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019663 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019664 }
19665
Marat Dukhan801d2c22021-06-02 21:25:05 -070019666 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, strided_cm) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070019667 TEST_REQUIRES_X86_SSE2;
19668 GemmMicrokernelTester()
19669 .mr(2)
19670 .nr(4)
19671 .kr(2)
19672 .sr(1)
19673 .m(2)
19674 .n(4)
19675 .k(8)
19676 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080019677 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019678 }
19679#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
19680
19681
19682#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan801d2c22021-06-02 21:25:05 -070019683 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_eq_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070019684 TEST_REQUIRES_X86_SSE2;
19685 GemmMicrokernelTester()
19686 .mr(4)
19687 .nr(4)
19688 .kr(2)
19689 .sr(1)
19690 .m(4)
19691 .n(4)
19692 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080019693 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019694 }
19695
Marat Dukhan801d2c22021-06-02 21:25:05 -070019696 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070019697 TEST_REQUIRES_X86_SSE2;
19698 GemmMicrokernelTester()
19699 .mr(4)
19700 .nr(4)
19701 .kr(2)
19702 .sr(1)
19703 .m(4)
19704 .n(4)
19705 .k(8)
19706 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080019707 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019708 }
19709
Marat Dukhan801d2c22021-06-02 21:25:05 -070019710 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_eq_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070019711 TEST_REQUIRES_X86_SSE2;
Zhi An Ng83844ae2022-01-14 09:52:25 -080019712 for (uint32_t n = 1; n <= 4; n++) {
19713 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070019714 GemmMicrokernelTester()
19715 .mr(4)
19716 .nr(4)
19717 .kr(2)
19718 .sr(1)
19719 .m(m)
19720 .n(n)
19721 .k(8)
19722 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019723 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019724 }
19725 }
19726 }
19727
Marat Dukhan801d2c22021-06-02 21:25:05 -070019728 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_eq_8_subtile_m) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070019729 TEST_REQUIRES_X86_SSE2;
19730 for (uint32_t m = 1; m <= 4; m++) {
19731 GemmMicrokernelTester()
19732 .mr(4)
19733 .nr(4)
19734 .kr(2)
19735 .sr(1)
19736 .m(m)
19737 .n(4)
19738 .k(8)
19739 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019740 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019741 }
19742 }
19743
Marat Dukhan801d2c22021-06-02 21:25:05 -070019744 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_eq_8_subtile_n) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070019745 TEST_REQUIRES_X86_SSE2;
19746 for (uint32_t n = 1; n <= 4; n++) {
19747 GemmMicrokernelTester()
19748 .mr(4)
19749 .nr(4)
19750 .kr(2)
19751 .sr(1)
19752 .m(4)
19753 .n(n)
19754 .k(8)
19755 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019756 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019757 }
19758 }
19759
Marat Dukhan801d2c22021-06-02 21:25:05 -070019760 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_lt_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070019761 TEST_REQUIRES_X86_SSE2;
19762 for (size_t k = 1; k < 8; k++) {
19763 GemmMicrokernelTester()
19764 .mr(4)
19765 .nr(4)
19766 .kr(2)
19767 .sr(1)
19768 .m(4)
19769 .n(4)
19770 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080019771 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019772 }
19773 }
19774
Marat Dukhan801d2c22021-06-02 21:25:05 -070019775 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_lt_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070019776 TEST_REQUIRES_X86_SSE2;
19777 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019778 for (uint32_t n = 1; n <= 4; n++) {
19779 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070019780 GemmMicrokernelTester()
19781 .mr(4)
19782 .nr(4)
19783 .kr(2)
19784 .sr(1)
19785 .m(m)
19786 .n(n)
19787 .k(k)
19788 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019789 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019790 }
19791 }
19792 }
19793 }
19794
Marat Dukhan801d2c22021-06-02 21:25:05 -070019795 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_gt_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070019796 TEST_REQUIRES_X86_SSE2;
19797 for (size_t k = 9; k < 16; k++) {
19798 GemmMicrokernelTester()
19799 .mr(4)
19800 .nr(4)
19801 .kr(2)
19802 .sr(1)
19803 .m(4)
19804 .n(4)
19805 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080019806 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019807 }
19808 }
19809
Marat Dukhan801d2c22021-06-02 21:25:05 -070019810 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_gt_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070019811 TEST_REQUIRES_X86_SSE2;
19812 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019813 for (uint32_t n = 1; n <= 4; n++) {
19814 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070019815 GemmMicrokernelTester()
19816 .mr(4)
19817 .nr(4)
19818 .kr(2)
19819 .sr(1)
19820 .m(m)
19821 .n(n)
19822 .k(k)
19823 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019824 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019825 }
19826 }
19827 }
19828 }
19829
Marat Dukhan801d2c22021-06-02 21:25:05 -070019830 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_div_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070019831 TEST_REQUIRES_X86_SSE2;
19832 for (size_t k = 16; k <= 80; k += 8) {
19833 GemmMicrokernelTester()
19834 .mr(4)
19835 .nr(4)
19836 .kr(2)
19837 .sr(1)
19838 .m(4)
19839 .n(4)
19840 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080019841 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019842 }
19843 }
19844
Marat Dukhan801d2c22021-06-02 21:25:05 -070019845 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_div_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070019846 TEST_REQUIRES_X86_SSE2;
19847 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019848 for (uint32_t n = 1; n <= 4; n++) {
19849 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070019850 GemmMicrokernelTester()
19851 .mr(4)
19852 .nr(4)
19853 .kr(2)
19854 .sr(1)
19855 .m(m)
19856 .n(n)
19857 .k(k)
19858 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019859 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019860 }
19861 }
19862 }
19863 }
19864
Marat Dukhan801d2c22021-06-02 21:25:05 -070019865 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_gt_4) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070019866 TEST_REQUIRES_X86_SSE2;
19867 for (uint32_t n = 5; n < 8; n++) {
19868 for (size_t k = 1; k <= 40; k += 9) {
19869 GemmMicrokernelTester()
19870 .mr(4)
19871 .nr(4)
19872 .kr(2)
19873 .sr(1)
19874 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019875 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070019876 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080019877 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019878 }
19879 }
19880 }
19881
Marat Dukhan801d2c22021-06-02 21:25:05 -070019882 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_gt_4_strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070019883 TEST_REQUIRES_X86_SSE2;
19884 for (uint32_t n = 5; n < 8; n++) {
19885 for (size_t k = 1; k <= 40; k += 9) {
19886 GemmMicrokernelTester()
19887 .mr(4)
19888 .nr(4)
19889 .kr(2)
19890 .sr(1)
19891 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019892 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070019893 .k(k)
19894 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080019895 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019896 }
19897 }
19898 }
19899
Marat Dukhan801d2c22021-06-02 21:25:05 -070019900 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_gt_4_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070019901 TEST_REQUIRES_X86_SSE2;
19902 for (uint32_t n = 5; n < 8; n++) {
19903 for (size_t k = 1; k <= 40; k += 9) {
19904 for (uint32_t m = 1; m <= 4; m++) {
19905 GemmMicrokernelTester()
19906 .mr(4)
19907 .nr(4)
19908 .kr(2)
19909 .sr(1)
19910 .m(m)
19911 .n(n)
19912 .k(k)
19913 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019914 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019915 }
19916 }
19917 }
19918 }
19919
Marat Dukhan801d2c22021-06-02 21:25:05 -070019920 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_div_4) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070019921 TEST_REQUIRES_X86_SSE2;
19922 for (uint32_t n = 8; n <= 12; n += 4) {
19923 for (size_t k = 1; k <= 40; k += 9) {
19924 GemmMicrokernelTester()
19925 .mr(4)
19926 .nr(4)
19927 .kr(2)
19928 .sr(1)
19929 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019930 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070019931 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080019932 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019933 }
19934 }
19935 }
19936
Marat Dukhan801d2c22021-06-02 21:25:05 -070019937 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_div_4_strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070019938 TEST_REQUIRES_X86_SSE2;
19939 for (uint32_t n = 8; n <= 12; n += 4) {
19940 for (size_t k = 1; k <= 40; k += 9) {
19941 GemmMicrokernelTester()
19942 .mr(4)
19943 .nr(4)
19944 .kr(2)
19945 .sr(1)
19946 .m(4)
19947 .n(n)
19948 .k(k)
19949 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080019950 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019951 }
19952 }
19953 }
19954
Marat Dukhan801d2c22021-06-02 21:25:05 -070019955 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_div_4_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070019956 TEST_REQUIRES_X86_SSE2;
19957 for (uint32_t n = 8; n <= 12; n += 4) {
19958 for (size_t k = 1; k <= 40; k += 9) {
19959 for (uint32_t m = 1; m <= 4; m++) {
19960 GemmMicrokernelTester()
19961 .mr(4)
19962 .nr(4)
19963 .kr(2)
19964 .sr(1)
19965 .m(m)
19966 .n(n)
19967 .k(k)
19968 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019969 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019970 }
19971 }
19972 }
19973 }
19974
Marat Dukhan801d2c22021-06-02 21:25:05 -070019975 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, small_kernel) {
19976 TEST_REQUIRES_X86_SSE2;
19977 for (size_t k = 1; k <= 40; k += 9) {
19978 GemmMicrokernelTester()
19979 .mr(4)
19980 .nr(4)
19981 .kr(2)
19982 .sr(1)
19983 .m(4)
19984 .n(4)
19985 .k(k)
19986 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080019987 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070019988 }
19989 }
19990
19991 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, small_kernel_subtile) {
19992 TEST_REQUIRES_X86_SSE2;
19993 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019994 for (uint32_t n = 1; n <= 4; n++) {
19995 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan801d2c22021-06-02 21:25:05 -070019996 GemmMicrokernelTester()
19997 .mr(4)
19998 .nr(4)
19999 .kr(2)
20000 .sr(1)
20001 .m(m)
20002 .n(n)
20003 .k(k)
20004 .ks(3)
20005 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020006 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070020007 }
20008 }
20009 }
20010 }
20011
20012 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_gt_4_small_kernel) {
20013 TEST_REQUIRES_X86_SSE2;
20014 for (uint32_t n = 5; n < 8; n++) {
20015 for (size_t k = 1; k <= 40; k += 9) {
20016 GemmMicrokernelTester()
20017 .mr(4)
20018 .nr(4)
20019 .kr(2)
20020 .sr(1)
20021 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020022 .n(n)
Marat Dukhan801d2c22021-06-02 21:25:05 -070020023 .k(k)
20024 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080020025 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070020026 }
20027 }
20028 }
20029
20030 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_div_4_small_kernel) {
20031 TEST_REQUIRES_X86_SSE2;
20032 for (uint32_t n = 8; n <= 12; n += 4) {
20033 for (size_t k = 1; k <= 40; k += 9) {
20034 GemmMicrokernelTester()
20035 .mr(4)
20036 .nr(4)
20037 .kr(2)
20038 .sr(1)
20039 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020040 .n(n)
Marat Dukhan801d2c22021-06-02 21:25:05 -070020041 .k(k)
20042 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080020043 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070020044 }
20045 }
20046 }
20047
20048 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, strided_cm_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070020049 TEST_REQUIRES_X86_SSE2;
20050 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020051 for (uint32_t n = 1; n <= 4; n++) {
20052 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070020053 GemmMicrokernelTester()
20054 .mr(4)
20055 .nr(4)
20056 .kr(2)
20057 .sr(1)
20058 .m(m)
20059 .n(n)
20060 .k(k)
20061 .cm_stride(7)
20062 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020063 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020064 }
20065 }
20066 }
20067 }
20068
Marat Dukhan801d2c22021-06-02 21:25:05 -070020069 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, a_offset) {
20070 TEST_REQUIRES_X86_SSE2;
20071 for (size_t k = 1; k <= 40; k += 9) {
20072 GemmMicrokernelTester()
20073 .mr(4)
20074 .nr(4)
20075 .kr(2)
20076 .sr(1)
20077 .m(4)
20078 .n(4)
20079 .k(k)
20080 .ks(3)
20081 .a_offset(163)
Marat Dukhan50323b82022-01-11 00:12:01 -080020082 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070020083 }
20084 }
20085
20086 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, zero) {
20087 TEST_REQUIRES_X86_SSE2;
Zhi An Ng83844ae2022-01-14 09:52:25 -080020088 for (size_t k = 1; k <= 40; k += 9) {
20089 for (uint32_t mz = 0; mz < 4; mz++) {
Marat Dukhan801d2c22021-06-02 21:25:05 -070020090 GemmMicrokernelTester()
20091 .mr(4)
20092 .nr(4)
20093 .kr(2)
20094 .sr(1)
20095 .m(4)
20096 .n(4)
20097 .k(k)
20098 .ks(3)
20099 .a_offset(163)
20100 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080020101 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070020102 }
20103 }
20104 }
20105
20106 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, qmin) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070020107 TEST_REQUIRES_X86_SSE2;
20108 GemmMicrokernelTester()
20109 .mr(4)
20110 .nr(4)
20111 .kr(2)
20112 .sr(1)
20113 .m(4)
20114 .n(4)
20115 .k(8)
20116 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020117 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020118 }
20119
Marat Dukhan801d2c22021-06-02 21:25:05 -070020120 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, qmax) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070020121 TEST_REQUIRES_X86_SSE2;
20122 GemmMicrokernelTester()
20123 .mr(4)
20124 .nr(4)
20125 .kr(2)
20126 .sr(1)
20127 .m(4)
20128 .n(4)
20129 .k(8)
20130 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020131 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020132 }
20133
Marat Dukhan801d2c22021-06-02 21:25:05 -070020134 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, strided_cm) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070020135 TEST_REQUIRES_X86_SSE2;
20136 GemmMicrokernelTester()
20137 .mr(4)
20138 .nr(4)
20139 .kr(2)
20140 .sr(1)
20141 .m(4)
20142 .n(4)
20143 .k(8)
20144 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080020145 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020146 }
20147#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
20148
20149
20150#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan801d2c22021-06-02 21:25:05 -070020151 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_eq_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070020152 TEST_REQUIRES_X86_SSE41;
20153 GemmMicrokernelTester()
20154 .mr(1)
20155 .nr(4)
20156 .kr(2)
20157 .sr(1)
20158 .m(1)
20159 .n(4)
20160 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080020161 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020162 }
20163
Marat Dukhan801d2c22021-06-02 21:25:05 -070020164 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070020165 TEST_REQUIRES_X86_SSE41;
20166 GemmMicrokernelTester()
20167 .mr(1)
20168 .nr(4)
20169 .kr(2)
20170 .sr(1)
20171 .m(1)
20172 .n(4)
20173 .k(8)
20174 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080020175 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020176 }
20177
Marat Dukhan801d2c22021-06-02 21:25:05 -070020178 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_eq_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070020179 TEST_REQUIRES_X86_SSE41;
Zhi An Ng83844ae2022-01-14 09:52:25 -080020180 for (uint32_t n = 1; n <= 4; n++) {
20181 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070020182 GemmMicrokernelTester()
20183 .mr(1)
20184 .nr(4)
20185 .kr(2)
20186 .sr(1)
20187 .m(m)
20188 .n(n)
20189 .k(8)
20190 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020191 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020192 }
20193 }
20194 }
20195
Marat Dukhan801d2c22021-06-02 21:25:05 -070020196 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_eq_8_subtile_m) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070020197 TEST_REQUIRES_X86_SSE41;
20198 for (uint32_t m = 1; m <= 1; m++) {
20199 GemmMicrokernelTester()
20200 .mr(1)
20201 .nr(4)
20202 .kr(2)
20203 .sr(1)
20204 .m(m)
20205 .n(4)
20206 .k(8)
20207 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020208 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020209 }
20210 }
20211
Marat Dukhan801d2c22021-06-02 21:25:05 -070020212 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_eq_8_subtile_n) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070020213 TEST_REQUIRES_X86_SSE41;
20214 for (uint32_t n = 1; n <= 4; n++) {
20215 GemmMicrokernelTester()
20216 .mr(1)
20217 .nr(4)
20218 .kr(2)
20219 .sr(1)
20220 .m(1)
20221 .n(n)
20222 .k(8)
20223 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020224 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020225 }
20226 }
20227
Marat Dukhan801d2c22021-06-02 21:25:05 -070020228 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_lt_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070020229 TEST_REQUIRES_X86_SSE41;
20230 for (size_t k = 1; k < 8; k++) {
20231 GemmMicrokernelTester()
20232 .mr(1)
20233 .nr(4)
20234 .kr(2)
20235 .sr(1)
20236 .m(1)
20237 .n(4)
20238 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080020239 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020240 }
20241 }
20242
Marat Dukhan801d2c22021-06-02 21:25:05 -070020243 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_lt_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070020244 TEST_REQUIRES_X86_SSE41;
20245 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020246 for (uint32_t n = 1; n <= 4; n++) {
20247 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070020248 GemmMicrokernelTester()
20249 .mr(1)
20250 .nr(4)
20251 .kr(2)
20252 .sr(1)
20253 .m(m)
20254 .n(n)
20255 .k(k)
20256 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020257 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020258 }
20259 }
20260 }
20261 }
20262
Marat Dukhan801d2c22021-06-02 21:25:05 -070020263 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_gt_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070020264 TEST_REQUIRES_X86_SSE41;
20265 for (size_t k = 9; k < 16; k++) {
20266 GemmMicrokernelTester()
20267 .mr(1)
20268 .nr(4)
20269 .kr(2)
20270 .sr(1)
20271 .m(1)
20272 .n(4)
20273 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080020274 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020275 }
20276 }
20277
Marat Dukhan801d2c22021-06-02 21:25:05 -070020278 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_gt_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070020279 TEST_REQUIRES_X86_SSE41;
20280 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020281 for (uint32_t n = 1; n <= 4; n++) {
20282 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070020283 GemmMicrokernelTester()
20284 .mr(1)
20285 .nr(4)
20286 .kr(2)
20287 .sr(1)
20288 .m(m)
20289 .n(n)
20290 .k(k)
20291 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020292 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020293 }
20294 }
20295 }
20296 }
20297
Marat Dukhan801d2c22021-06-02 21:25:05 -070020298 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_div_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070020299 TEST_REQUIRES_X86_SSE41;
20300 for (size_t k = 16; k <= 80; k += 8) {
20301 GemmMicrokernelTester()
20302 .mr(1)
20303 .nr(4)
20304 .kr(2)
20305 .sr(1)
20306 .m(1)
20307 .n(4)
20308 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080020309 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020310 }
20311 }
20312
Marat Dukhan801d2c22021-06-02 21:25:05 -070020313 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_div_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070020314 TEST_REQUIRES_X86_SSE41;
20315 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020316 for (uint32_t n = 1; n <= 4; n++) {
20317 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070020318 GemmMicrokernelTester()
20319 .mr(1)
20320 .nr(4)
20321 .kr(2)
20322 .sr(1)
20323 .m(m)
20324 .n(n)
20325 .k(k)
20326 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020327 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020328 }
20329 }
20330 }
20331 }
20332
Marat Dukhan801d2c22021-06-02 21:25:05 -070020333 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, n_gt_4) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070020334 TEST_REQUIRES_X86_SSE41;
20335 for (uint32_t n = 5; n < 8; n++) {
20336 for (size_t k = 1; k <= 40; k += 9) {
20337 GemmMicrokernelTester()
20338 .mr(1)
20339 .nr(4)
20340 .kr(2)
20341 .sr(1)
20342 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020343 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070020344 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080020345 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020346 }
20347 }
20348 }
20349
Marat Dukhan801d2c22021-06-02 21:25:05 -070020350 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, n_gt_4_strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070020351 TEST_REQUIRES_X86_SSE41;
20352 for (uint32_t n = 5; n < 8; n++) {
20353 for (size_t k = 1; k <= 40; k += 9) {
20354 GemmMicrokernelTester()
20355 .mr(1)
20356 .nr(4)
20357 .kr(2)
20358 .sr(1)
20359 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020360 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070020361 .k(k)
20362 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080020363 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020364 }
20365 }
20366 }
20367
Marat Dukhan801d2c22021-06-02 21:25:05 -070020368 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, n_gt_4_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070020369 TEST_REQUIRES_X86_SSE41;
20370 for (uint32_t n = 5; n < 8; n++) {
20371 for (size_t k = 1; k <= 40; k += 9) {
20372 for (uint32_t m = 1; m <= 1; m++) {
20373 GemmMicrokernelTester()
20374 .mr(1)
20375 .nr(4)
20376 .kr(2)
20377 .sr(1)
20378 .m(m)
20379 .n(n)
20380 .k(k)
20381 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020382 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020383 }
20384 }
20385 }
20386 }
20387
Marat Dukhan801d2c22021-06-02 21:25:05 -070020388 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, n_div_4) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070020389 TEST_REQUIRES_X86_SSE41;
20390 for (uint32_t n = 8; n <= 12; n += 4) {
20391 for (size_t k = 1; k <= 40; k += 9) {
20392 GemmMicrokernelTester()
20393 .mr(1)
20394 .nr(4)
20395 .kr(2)
20396 .sr(1)
20397 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020398 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070020399 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080020400 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020401 }
20402 }
20403 }
20404
Marat Dukhan801d2c22021-06-02 21:25:05 -070020405 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, n_div_4_strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070020406 TEST_REQUIRES_X86_SSE41;
20407 for (uint32_t n = 8; n <= 12; n += 4) {
20408 for (size_t k = 1; k <= 40; k += 9) {
20409 GemmMicrokernelTester()
20410 .mr(1)
20411 .nr(4)
20412 .kr(2)
20413 .sr(1)
20414 .m(1)
20415 .n(n)
20416 .k(k)
20417 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080020418 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020419 }
20420 }
20421 }
20422
Marat Dukhan801d2c22021-06-02 21:25:05 -070020423 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, n_div_4_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070020424 TEST_REQUIRES_X86_SSE41;
20425 for (uint32_t n = 8; n <= 12; n += 4) {
20426 for (size_t k = 1; k <= 40; k += 9) {
20427 for (uint32_t m = 1; m <= 1; m++) {
20428 GemmMicrokernelTester()
20429 .mr(1)
20430 .nr(4)
20431 .kr(2)
20432 .sr(1)
20433 .m(m)
20434 .n(n)
20435 .k(k)
20436 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020437 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020438 }
20439 }
20440 }
20441 }
20442
Marat Dukhan801d2c22021-06-02 21:25:05 -070020443 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, small_kernel) {
20444 TEST_REQUIRES_X86_SSE41;
20445 for (size_t k = 1; k <= 40; k += 9) {
20446 GemmMicrokernelTester()
20447 .mr(1)
20448 .nr(4)
20449 .kr(2)
20450 .sr(1)
20451 .m(1)
20452 .n(4)
20453 .k(k)
20454 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080020455 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070020456 }
20457 }
20458
20459 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, small_kernel_subtile) {
20460 TEST_REQUIRES_X86_SSE41;
20461 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020462 for (uint32_t n = 1; n <= 4; n++) {
20463 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan801d2c22021-06-02 21:25:05 -070020464 GemmMicrokernelTester()
20465 .mr(1)
20466 .nr(4)
20467 .kr(2)
20468 .sr(1)
20469 .m(m)
20470 .n(n)
20471 .k(k)
20472 .ks(3)
20473 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020474 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070020475 }
20476 }
20477 }
20478 }
20479
20480 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, n_gt_4_small_kernel) {
20481 TEST_REQUIRES_X86_SSE41;
20482 for (uint32_t n = 5; n < 8; n++) {
20483 for (size_t k = 1; k <= 40; k += 9) {
20484 GemmMicrokernelTester()
20485 .mr(1)
20486 .nr(4)
20487 .kr(2)
20488 .sr(1)
20489 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020490 .n(n)
Marat Dukhan801d2c22021-06-02 21:25:05 -070020491 .k(k)
20492 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080020493 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070020494 }
20495 }
20496 }
20497
20498 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, n_div_4_small_kernel) {
20499 TEST_REQUIRES_X86_SSE41;
20500 for (uint32_t n = 8; n <= 12; n += 4) {
20501 for (size_t k = 1; k <= 40; k += 9) {
20502 GemmMicrokernelTester()
20503 .mr(1)
20504 .nr(4)
20505 .kr(2)
20506 .sr(1)
20507 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020508 .n(n)
Marat Dukhan801d2c22021-06-02 21:25:05 -070020509 .k(k)
20510 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080020511 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070020512 }
20513 }
20514 }
20515
20516 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, strided_cm_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070020517 TEST_REQUIRES_X86_SSE41;
20518 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020519 for (uint32_t n = 1; n <= 4; n++) {
20520 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070020521 GemmMicrokernelTester()
20522 .mr(1)
20523 .nr(4)
20524 .kr(2)
20525 .sr(1)
20526 .m(m)
20527 .n(n)
20528 .k(k)
20529 .cm_stride(7)
20530 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020531 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020532 }
20533 }
20534 }
20535 }
20536
Marat Dukhan801d2c22021-06-02 21:25:05 -070020537 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, a_offset) {
20538 TEST_REQUIRES_X86_SSE41;
20539 for (size_t k = 1; k <= 40; k += 9) {
20540 GemmMicrokernelTester()
20541 .mr(1)
20542 .nr(4)
20543 .kr(2)
20544 .sr(1)
20545 .m(1)
20546 .n(4)
20547 .k(k)
20548 .ks(3)
20549 .a_offset(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080020550 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070020551 }
20552 }
20553
20554 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, zero) {
20555 TEST_REQUIRES_X86_SSE41;
Zhi An Ng83844ae2022-01-14 09:52:25 -080020556 for (size_t k = 1; k <= 40; k += 9) {
20557 for (uint32_t mz = 0; mz < 1; mz++) {
Marat Dukhan801d2c22021-06-02 21:25:05 -070020558 GemmMicrokernelTester()
20559 .mr(1)
20560 .nr(4)
20561 .kr(2)
20562 .sr(1)
20563 .m(1)
20564 .n(4)
20565 .k(k)
20566 .ks(3)
20567 .a_offset(43)
20568 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080020569 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070020570 }
20571 }
20572 }
20573
20574 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, qmin) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070020575 TEST_REQUIRES_X86_SSE41;
20576 GemmMicrokernelTester()
20577 .mr(1)
20578 .nr(4)
20579 .kr(2)
20580 .sr(1)
20581 .m(1)
20582 .n(4)
20583 .k(8)
20584 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020585 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020586 }
20587
Marat Dukhan801d2c22021-06-02 21:25:05 -070020588 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, qmax) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070020589 TEST_REQUIRES_X86_SSE41;
20590 GemmMicrokernelTester()
20591 .mr(1)
20592 .nr(4)
20593 .kr(2)
20594 .sr(1)
20595 .m(1)
20596 .n(4)
20597 .k(8)
20598 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020599 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020600 }
20601
Marat Dukhan801d2c22021-06-02 21:25:05 -070020602 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, strided_cm) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070020603 TEST_REQUIRES_X86_SSE41;
20604 GemmMicrokernelTester()
20605 .mr(1)
20606 .nr(4)
20607 .kr(2)
20608 .sr(1)
20609 .m(1)
20610 .n(4)
20611 .k(8)
20612 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080020613 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020614 }
20615#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
20616
20617
20618#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan801d2c22021-06-02 21:25:05 -070020619 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_eq_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070020620 TEST_REQUIRES_X86_SSE41;
20621 GemmMicrokernelTester()
20622 .mr(2)
20623 .nr(4)
20624 .kr(2)
20625 .sr(1)
20626 .m(2)
20627 .n(4)
20628 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080020629 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020630 }
20631
Marat Dukhan801d2c22021-06-02 21:25:05 -070020632 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070020633 TEST_REQUIRES_X86_SSE41;
20634 GemmMicrokernelTester()
20635 .mr(2)
20636 .nr(4)
20637 .kr(2)
20638 .sr(1)
20639 .m(2)
20640 .n(4)
20641 .k(8)
20642 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080020643 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020644 }
20645
Marat Dukhan801d2c22021-06-02 21:25:05 -070020646 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_eq_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070020647 TEST_REQUIRES_X86_SSE41;
Zhi An Ng83844ae2022-01-14 09:52:25 -080020648 for (uint32_t n = 1; n <= 4; n++) {
20649 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070020650 GemmMicrokernelTester()
20651 .mr(2)
20652 .nr(4)
20653 .kr(2)
20654 .sr(1)
20655 .m(m)
20656 .n(n)
20657 .k(8)
20658 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020659 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020660 }
20661 }
20662 }
20663
Marat Dukhan801d2c22021-06-02 21:25:05 -070020664 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_eq_8_subtile_m) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070020665 TEST_REQUIRES_X86_SSE41;
20666 for (uint32_t m = 1; m <= 2; m++) {
20667 GemmMicrokernelTester()
20668 .mr(2)
20669 .nr(4)
20670 .kr(2)
20671 .sr(1)
20672 .m(m)
20673 .n(4)
20674 .k(8)
20675 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020676 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020677 }
20678 }
20679
Marat Dukhan801d2c22021-06-02 21:25:05 -070020680 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_eq_8_subtile_n) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070020681 TEST_REQUIRES_X86_SSE41;
20682 for (uint32_t n = 1; n <= 4; n++) {
20683 GemmMicrokernelTester()
20684 .mr(2)
20685 .nr(4)
20686 .kr(2)
20687 .sr(1)
20688 .m(2)
20689 .n(n)
20690 .k(8)
20691 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020692 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020693 }
20694 }
20695
Marat Dukhan801d2c22021-06-02 21:25:05 -070020696 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_lt_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070020697 TEST_REQUIRES_X86_SSE41;
20698 for (size_t k = 1; k < 8; k++) {
20699 GemmMicrokernelTester()
20700 .mr(2)
20701 .nr(4)
20702 .kr(2)
20703 .sr(1)
20704 .m(2)
20705 .n(4)
20706 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080020707 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020708 }
20709 }
20710
Marat Dukhan801d2c22021-06-02 21:25:05 -070020711 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_lt_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070020712 TEST_REQUIRES_X86_SSE41;
20713 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020714 for (uint32_t n = 1; n <= 4; n++) {
20715 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070020716 GemmMicrokernelTester()
20717 .mr(2)
20718 .nr(4)
20719 .kr(2)
20720 .sr(1)
20721 .m(m)
20722 .n(n)
20723 .k(k)
20724 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020725 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020726 }
20727 }
20728 }
20729 }
20730
Marat Dukhan801d2c22021-06-02 21:25:05 -070020731 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_gt_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070020732 TEST_REQUIRES_X86_SSE41;
20733 for (size_t k = 9; k < 16; k++) {
20734 GemmMicrokernelTester()
20735 .mr(2)
20736 .nr(4)
20737 .kr(2)
20738 .sr(1)
20739 .m(2)
20740 .n(4)
20741 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080020742 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020743 }
20744 }
20745
Marat Dukhan801d2c22021-06-02 21:25:05 -070020746 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_gt_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070020747 TEST_REQUIRES_X86_SSE41;
20748 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020749 for (uint32_t n = 1; n <= 4; n++) {
20750 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070020751 GemmMicrokernelTester()
20752 .mr(2)
20753 .nr(4)
20754 .kr(2)
20755 .sr(1)
20756 .m(m)
20757 .n(n)
20758 .k(k)
20759 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020760 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020761 }
20762 }
20763 }
20764 }
20765
Marat Dukhan801d2c22021-06-02 21:25:05 -070020766 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_div_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070020767 TEST_REQUIRES_X86_SSE41;
20768 for (size_t k = 16; k <= 80; k += 8) {
20769 GemmMicrokernelTester()
20770 .mr(2)
20771 .nr(4)
20772 .kr(2)
20773 .sr(1)
20774 .m(2)
20775 .n(4)
20776 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080020777 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020778 }
20779 }
20780
Marat Dukhan801d2c22021-06-02 21:25:05 -070020781 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_div_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070020782 TEST_REQUIRES_X86_SSE41;
20783 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020784 for (uint32_t n = 1; n <= 4; n++) {
20785 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070020786 GemmMicrokernelTester()
20787 .mr(2)
20788 .nr(4)
20789 .kr(2)
20790 .sr(1)
20791 .m(m)
20792 .n(n)
20793 .k(k)
20794 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020795 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020796 }
20797 }
20798 }
20799 }
20800
Marat Dukhan801d2c22021-06-02 21:25:05 -070020801 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, n_gt_4) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070020802 TEST_REQUIRES_X86_SSE41;
20803 for (uint32_t n = 5; n < 8; n++) {
20804 for (size_t k = 1; k <= 40; k += 9) {
20805 GemmMicrokernelTester()
20806 .mr(2)
20807 .nr(4)
20808 .kr(2)
20809 .sr(1)
20810 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020811 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070020812 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080020813 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020814 }
20815 }
20816 }
20817
Marat Dukhan801d2c22021-06-02 21:25:05 -070020818 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, n_gt_4_strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070020819 TEST_REQUIRES_X86_SSE41;
20820 for (uint32_t n = 5; n < 8; n++) {
20821 for (size_t k = 1; k <= 40; k += 9) {
20822 GemmMicrokernelTester()
20823 .mr(2)
20824 .nr(4)
20825 .kr(2)
20826 .sr(1)
20827 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020828 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070020829 .k(k)
20830 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080020831 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020832 }
20833 }
20834 }
20835
Marat Dukhan801d2c22021-06-02 21:25:05 -070020836 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, n_gt_4_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070020837 TEST_REQUIRES_X86_SSE41;
20838 for (uint32_t n = 5; n < 8; n++) {
20839 for (size_t k = 1; k <= 40; k += 9) {
20840 for (uint32_t m = 1; m <= 2; m++) {
20841 GemmMicrokernelTester()
20842 .mr(2)
20843 .nr(4)
20844 .kr(2)
20845 .sr(1)
20846 .m(m)
20847 .n(n)
20848 .k(k)
20849 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020850 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020851 }
20852 }
20853 }
20854 }
20855
Marat Dukhan801d2c22021-06-02 21:25:05 -070020856 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, n_div_4) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070020857 TEST_REQUIRES_X86_SSE41;
20858 for (uint32_t n = 8; n <= 12; n += 4) {
20859 for (size_t k = 1; k <= 40; k += 9) {
20860 GemmMicrokernelTester()
20861 .mr(2)
20862 .nr(4)
20863 .kr(2)
20864 .sr(1)
20865 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020866 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070020867 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080020868 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020869 }
20870 }
20871 }
20872
Marat Dukhan801d2c22021-06-02 21:25:05 -070020873 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, n_div_4_strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070020874 TEST_REQUIRES_X86_SSE41;
20875 for (uint32_t n = 8; n <= 12; n += 4) {
20876 for (size_t k = 1; k <= 40; k += 9) {
20877 GemmMicrokernelTester()
20878 .mr(2)
20879 .nr(4)
20880 .kr(2)
20881 .sr(1)
20882 .m(2)
20883 .n(n)
20884 .k(k)
20885 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080020886 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020887 }
20888 }
20889 }
20890
Marat Dukhan801d2c22021-06-02 21:25:05 -070020891 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, n_div_4_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070020892 TEST_REQUIRES_X86_SSE41;
20893 for (uint32_t n = 8; n <= 12; n += 4) {
20894 for (size_t k = 1; k <= 40; k += 9) {
20895 for (uint32_t m = 1; m <= 2; m++) {
20896 GemmMicrokernelTester()
20897 .mr(2)
20898 .nr(4)
20899 .kr(2)
20900 .sr(1)
20901 .m(m)
20902 .n(n)
20903 .k(k)
20904 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020905 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020906 }
20907 }
20908 }
20909 }
20910
Marat Dukhan801d2c22021-06-02 21:25:05 -070020911 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, small_kernel) {
20912 TEST_REQUIRES_X86_SSE41;
20913 for (size_t k = 1; k <= 40; k += 9) {
20914 GemmMicrokernelTester()
20915 .mr(2)
20916 .nr(4)
20917 .kr(2)
20918 .sr(1)
20919 .m(2)
20920 .n(4)
20921 .k(k)
20922 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080020923 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070020924 }
20925 }
20926
20927 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, small_kernel_subtile) {
20928 TEST_REQUIRES_X86_SSE41;
20929 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020930 for (uint32_t n = 1; n <= 4; n++) {
20931 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan801d2c22021-06-02 21:25:05 -070020932 GemmMicrokernelTester()
20933 .mr(2)
20934 .nr(4)
20935 .kr(2)
20936 .sr(1)
20937 .m(m)
20938 .n(n)
20939 .k(k)
20940 .ks(3)
20941 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020942 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070020943 }
20944 }
20945 }
20946 }
20947
20948 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, n_gt_4_small_kernel) {
20949 TEST_REQUIRES_X86_SSE41;
20950 for (uint32_t n = 5; n < 8; n++) {
20951 for (size_t k = 1; k <= 40; k += 9) {
20952 GemmMicrokernelTester()
20953 .mr(2)
20954 .nr(4)
20955 .kr(2)
20956 .sr(1)
20957 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020958 .n(n)
Marat Dukhan801d2c22021-06-02 21:25:05 -070020959 .k(k)
20960 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080020961 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070020962 }
20963 }
20964 }
20965
20966 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, n_div_4_small_kernel) {
20967 TEST_REQUIRES_X86_SSE41;
20968 for (uint32_t n = 8; n <= 12; n += 4) {
20969 for (size_t k = 1; k <= 40; k += 9) {
20970 GemmMicrokernelTester()
20971 .mr(2)
20972 .nr(4)
20973 .kr(2)
20974 .sr(1)
20975 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020976 .n(n)
Marat Dukhan801d2c22021-06-02 21:25:05 -070020977 .k(k)
20978 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080020979 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070020980 }
20981 }
20982 }
20983
20984 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, strided_cm_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070020985 TEST_REQUIRES_X86_SSE41;
20986 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020987 for (uint32_t n = 1; n <= 4; n++) {
20988 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070020989 GemmMicrokernelTester()
20990 .mr(2)
20991 .nr(4)
20992 .kr(2)
20993 .sr(1)
20994 .m(m)
20995 .n(n)
20996 .k(k)
20997 .cm_stride(7)
20998 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020999 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021000 }
21001 }
21002 }
21003 }
21004
Marat Dukhan801d2c22021-06-02 21:25:05 -070021005 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, a_offset) {
21006 TEST_REQUIRES_X86_SSE41;
21007 for (size_t k = 1; k <= 40; k += 9) {
21008 GemmMicrokernelTester()
21009 .mr(2)
21010 .nr(4)
21011 .kr(2)
21012 .sr(1)
21013 .m(2)
21014 .n(4)
21015 .k(k)
21016 .ks(3)
21017 .a_offset(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080021018 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070021019 }
21020 }
21021
21022 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, zero) {
21023 TEST_REQUIRES_X86_SSE41;
Zhi An Ng83844ae2022-01-14 09:52:25 -080021024 for (size_t k = 1; k <= 40; k += 9) {
21025 for (uint32_t mz = 0; mz < 2; mz++) {
Marat Dukhan801d2c22021-06-02 21:25:05 -070021026 GemmMicrokernelTester()
21027 .mr(2)
21028 .nr(4)
21029 .kr(2)
21030 .sr(1)
21031 .m(2)
21032 .n(4)
21033 .k(k)
21034 .ks(3)
21035 .a_offset(83)
21036 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080021037 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070021038 }
21039 }
21040 }
21041
21042 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, qmin) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070021043 TEST_REQUIRES_X86_SSE41;
21044 GemmMicrokernelTester()
21045 .mr(2)
21046 .nr(4)
21047 .kr(2)
21048 .sr(1)
21049 .m(2)
21050 .n(4)
21051 .k(8)
21052 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021053 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021054 }
21055
Marat Dukhan801d2c22021-06-02 21:25:05 -070021056 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, qmax) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070021057 TEST_REQUIRES_X86_SSE41;
21058 GemmMicrokernelTester()
21059 .mr(2)
21060 .nr(4)
21061 .kr(2)
21062 .sr(1)
21063 .m(2)
21064 .n(4)
21065 .k(8)
21066 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021067 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021068 }
21069
Marat Dukhan801d2c22021-06-02 21:25:05 -070021070 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, strided_cm) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070021071 TEST_REQUIRES_X86_SSE41;
21072 GemmMicrokernelTester()
21073 .mr(2)
21074 .nr(4)
21075 .kr(2)
21076 .sr(1)
21077 .m(2)
21078 .n(4)
21079 .k(8)
21080 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080021081 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021082 }
21083#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
21084
21085
21086#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan801d2c22021-06-02 21:25:05 -070021087 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_eq_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070021088 TEST_REQUIRES_X86_AVX;
21089 GemmMicrokernelTester()
21090 .mr(3)
21091 .nr(4)
21092 .kr(2)
21093 .sr(1)
21094 .m(3)
21095 .n(4)
21096 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080021097 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021098 }
21099
Marat Dukhan801d2c22021-06-02 21:25:05 -070021100 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070021101 TEST_REQUIRES_X86_AVX;
21102 GemmMicrokernelTester()
21103 .mr(3)
21104 .nr(4)
21105 .kr(2)
21106 .sr(1)
21107 .m(3)
21108 .n(4)
21109 .k(8)
21110 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080021111 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021112 }
21113
Marat Dukhan801d2c22021-06-02 21:25:05 -070021114 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_eq_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070021115 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080021116 for (uint32_t n = 1; n <= 4; n++) {
21117 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070021118 GemmMicrokernelTester()
21119 .mr(3)
21120 .nr(4)
21121 .kr(2)
21122 .sr(1)
21123 .m(m)
21124 .n(n)
21125 .k(8)
21126 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021127 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021128 }
21129 }
21130 }
21131
Marat Dukhan801d2c22021-06-02 21:25:05 -070021132 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_eq_8_subtile_m) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070021133 TEST_REQUIRES_X86_AVX;
21134 for (uint32_t m = 1; m <= 3; m++) {
21135 GemmMicrokernelTester()
21136 .mr(3)
21137 .nr(4)
21138 .kr(2)
21139 .sr(1)
21140 .m(m)
21141 .n(4)
21142 .k(8)
21143 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021144 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021145 }
21146 }
21147
Marat Dukhan801d2c22021-06-02 21:25:05 -070021148 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_eq_8_subtile_n) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070021149 TEST_REQUIRES_X86_AVX;
21150 for (uint32_t n = 1; n <= 4; n++) {
21151 GemmMicrokernelTester()
21152 .mr(3)
21153 .nr(4)
21154 .kr(2)
21155 .sr(1)
21156 .m(3)
21157 .n(n)
21158 .k(8)
21159 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021160 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021161 }
21162 }
21163
Marat Dukhan801d2c22021-06-02 21:25:05 -070021164 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_lt_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070021165 TEST_REQUIRES_X86_AVX;
21166 for (size_t k = 1; k < 8; k++) {
21167 GemmMicrokernelTester()
21168 .mr(3)
21169 .nr(4)
21170 .kr(2)
21171 .sr(1)
21172 .m(3)
21173 .n(4)
21174 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080021175 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021176 }
21177 }
21178
Marat Dukhan801d2c22021-06-02 21:25:05 -070021179 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_lt_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070021180 TEST_REQUIRES_X86_AVX;
21181 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021182 for (uint32_t n = 1; n <= 4; n++) {
21183 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070021184 GemmMicrokernelTester()
21185 .mr(3)
21186 .nr(4)
21187 .kr(2)
21188 .sr(1)
21189 .m(m)
21190 .n(n)
21191 .k(k)
21192 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021193 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021194 }
21195 }
21196 }
21197 }
21198
Marat Dukhan801d2c22021-06-02 21:25:05 -070021199 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_gt_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070021200 TEST_REQUIRES_X86_AVX;
21201 for (size_t k = 9; k < 16; k++) {
21202 GemmMicrokernelTester()
21203 .mr(3)
21204 .nr(4)
21205 .kr(2)
21206 .sr(1)
21207 .m(3)
21208 .n(4)
21209 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080021210 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021211 }
21212 }
21213
Marat Dukhan801d2c22021-06-02 21:25:05 -070021214 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_gt_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070021215 TEST_REQUIRES_X86_AVX;
21216 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021217 for (uint32_t n = 1; n <= 4; n++) {
21218 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070021219 GemmMicrokernelTester()
21220 .mr(3)
21221 .nr(4)
21222 .kr(2)
21223 .sr(1)
21224 .m(m)
21225 .n(n)
21226 .k(k)
21227 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021228 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021229 }
21230 }
21231 }
21232 }
21233
Marat Dukhan801d2c22021-06-02 21:25:05 -070021234 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_div_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070021235 TEST_REQUIRES_X86_AVX;
21236 for (size_t k = 16; k <= 80; k += 8) {
21237 GemmMicrokernelTester()
21238 .mr(3)
21239 .nr(4)
21240 .kr(2)
21241 .sr(1)
21242 .m(3)
21243 .n(4)
21244 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080021245 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021246 }
21247 }
21248
Marat Dukhan801d2c22021-06-02 21:25:05 -070021249 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_div_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070021250 TEST_REQUIRES_X86_AVX;
21251 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021252 for (uint32_t n = 1; n <= 4; n++) {
21253 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070021254 GemmMicrokernelTester()
21255 .mr(3)
21256 .nr(4)
21257 .kr(2)
21258 .sr(1)
21259 .m(m)
21260 .n(n)
21261 .k(k)
21262 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021263 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021264 }
21265 }
21266 }
21267 }
21268
Marat Dukhan801d2c22021-06-02 21:25:05 -070021269 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_gt_4) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070021270 TEST_REQUIRES_X86_AVX;
21271 for (uint32_t n = 5; n < 8; n++) {
21272 for (size_t k = 1; k <= 40; k += 9) {
21273 GemmMicrokernelTester()
21274 .mr(3)
21275 .nr(4)
21276 .kr(2)
21277 .sr(1)
21278 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021279 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070021280 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080021281 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021282 }
21283 }
21284 }
21285
Marat Dukhan801d2c22021-06-02 21:25:05 -070021286 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_gt_4_strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070021287 TEST_REQUIRES_X86_AVX;
21288 for (uint32_t n = 5; n < 8; n++) {
21289 for (size_t k = 1; k <= 40; k += 9) {
21290 GemmMicrokernelTester()
21291 .mr(3)
21292 .nr(4)
21293 .kr(2)
21294 .sr(1)
21295 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021296 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070021297 .k(k)
21298 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080021299 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021300 }
21301 }
21302 }
21303
Marat Dukhan801d2c22021-06-02 21:25:05 -070021304 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_gt_4_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070021305 TEST_REQUIRES_X86_AVX;
21306 for (uint32_t n = 5; n < 8; n++) {
21307 for (size_t k = 1; k <= 40; k += 9) {
21308 for (uint32_t m = 1; m <= 3; m++) {
21309 GemmMicrokernelTester()
21310 .mr(3)
21311 .nr(4)
21312 .kr(2)
21313 .sr(1)
21314 .m(m)
21315 .n(n)
21316 .k(k)
21317 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021318 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021319 }
21320 }
21321 }
21322 }
21323
Marat Dukhan801d2c22021-06-02 21:25:05 -070021324 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_div_4) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070021325 TEST_REQUIRES_X86_AVX;
21326 for (uint32_t n = 8; n <= 12; n += 4) {
21327 for (size_t k = 1; k <= 40; k += 9) {
21328 GemmMicrokernelTester()
21329 .mr(3)
21330 .nr(4)
21331 .kr(2)
21332 .sr(1)
21333 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021334 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070021335 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080021336 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021337 }
21338 }
21339 }
21340
Marat Dukhan801d2c22021-06-02 21:25:05 -070021341 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_div_4_strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070021342 TEST_REQUIRES_X86_AVX;
21343 for (uint32_t n = 8; n <= 12; n += 4) {
21344 for (size_t k = 1; k <= 40; k += 9) {
21345 GemmMicrokernelTester()
21346 .mr(3)
21347 .nr(4)
21348 .kr(2)
21349 .sr(1)
21350 .m(3)
21351 .n(n)
21352 .k(k)
21353 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080021354 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021355 }
21356 }
21357 }
21358
Marat Dukhan801d2c22021-06-02 21:25:05 -070021359 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_div_4_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070021360 TEST_REQUIRES_X86_AVX;
21361 for (uint32_t n = 8; n <= 12; n += 4) {
21362 for (size_t k = 1; k <= 40; k += 9) {
21363 for (uint32_t m = 1; m <= 3; m++) {
21364 GemmMicrokernelTester()
21365 .mr(3)
21366 .nr(4)
21367 .kr(2)
21368 .sr(1)
21369 .m(m)
21370 .n(n)
21371 .k(k)
21372 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021373 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021374 }
21375 }
21376 }
21377 }
21378
Marat Dukhan801d2c22021-06-02 21:25:05 -070021379 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, small_kernel) {
21380 TEST_REQUIRES_X86_AVX;
21381 for (size_t k = 1; k <= 40; k += 9) {
21382 GemmMicrokernelTester()
21383 .mr(3)
21384 .nr(4)
21385 .kr(2)
21386 .sr(1)
21387 .m(3)
21388 .n(4)
21389 .k(k)
21390 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080021391 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070021392 }
21393 }
21394
21395 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, small_kernel_subtile) {
21396 TEST_REQUIRES_X86_AVX;
21397 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021398 for (uint32_t n = 1; n <= 4; n++) {
21399 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan801d2c22021-06-02 21:25:05 -070021400 GemmMicrokernelTester()
21401 .mr(3)
21402 .nr(4)
21403 .kr(2)
21404 .sr(1)
21405 .m(m)
21406 .n(n)
21407 .k(k)
21408 .ks(3)
21409 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021410 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070021411 }
21412 }
21413 }
21414 }
21415
21416 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_gt_4_small_kernel) {
21417 TEST_REQUIRES_X86_AVX;
21418 for (uint32_t n = 5; n < 8; n++) {
21419 for (size_t k = 1; k <= 40; k += 9) {
21420 GemmMicrokernelTester()
21421 .mr(3)
21422 .nr(4)
21423 .kr(2)
21424 .sr(1)
21425 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021426 .n(n)
Marat Dukhan801d2c22021-06-02 21:25:05 -070021427 .k(k)
21428 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080021429 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070021430 }
21431 }
21432 }
21433
21434 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_div_4_small_kernel) {
21435 TEST_REQUIRES_X86_AVX;
21436 for (uint32_t n = 8; n <= 12; n += 4) {
21437 for (size_t k = 1; k <= 40; k += 9) {
21438 GemmMicrokernelTester()
21439 .mr(3)
21440 .nr(4)
21441 .kr(2)
21442 .sr(1)
21443 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021444 .n(n)
Marat Dukhan801d2c22021-06-02 21:25:05 -070021445 .k(k)
21446 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080021447 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070021448 }
21449 }
21450 }
21451
21452 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, strided_cm_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070021453 TEST_REQUIRES_X86_AVX;
21454 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021455 for (uint32_t n = 1; n <= 4; n++) {
21456 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070021457 GemmMicrokernelTester()
21458 .mr(3)
21459 .nr(4)
21460 .kr(2)
21461 .sr(1)
21462 .m(m)
21463 .n(n)
21464 .k(k)
21465 .cm_stride(7)
21466 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021467 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021468 }
21469 }
21470 }
21471 }
21472
Marat Dukhan801d2c22021-06-02 21:25:05 -070021473 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, a_offset) {
21474 TEST_REQUIRES_X86_AVX;
21475 for (size_t k = 1; k <= 40; k += 9) {
21476 GemmMicrokernelTester()
21477 .mr(3)
21478 .nr(4)
21479 .kr(2)
21480 .sr(1)
21481 .m(3)
21482 .n(4)
21483 .k(k)
21484 .ks(3)
21485 .a_offset(127)
Marat Dukhan50323b82022-01-11 00:12:01 -080021486 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070021487 }
21488 }
21489
21490 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, zero) {
21491 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080021492 for (size_t k = 1; k <= 40; k += 9) {
21493 for (uint32_t mz = 0; mz < 3; mz++) {
Marat Dukhan801d2c22021-06-02 21:25:05 -070021494 GemmMicrokernelTester()
21495 .mr(3)
21496 .nr(4)
21497 .kr(2)
21498 .sr(1)
21499 .m(3)
21500 .n(4)
21501 .k(k)
21502 .ks(3)
21503 .a_offset(127)
21504 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080021505 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070021506 }
21507 }
21508 }
21509
21510 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, qmin) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070021511 TEST_REQUIRES_X86_AVX;
21512 GemmMicrokernelTester()
21513 .mr(3)
21514 .nr(4)
21515 .kr(2)
21516 .sr(1)
21517 .m(3)
21518 .n(4)
21519 .k(8)
21520 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021521 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021522 }
21523
Marat Dukhan801d2c22021-06-02 21:25:05 -070021524 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, qmax) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070021525 TEST_REQUIRES_X86_AVX;
21526 GemmMicrokernelTester()
21527 .mr(3)
21528 .nr(4)
21529 .kr(2)
21530 .sr(1)
21531 .m(3)
21532 .n(4)
21533 .k(8)
21534 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021535 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021536 }
21537
Marat Dukhan801d2c22021-06-02 21:25:05 -070021538 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, strided_cm) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070021539 TEST_REQUIRES_X86_AVX;
21540 GemmMicrokernelTester()
21541 .mr(3)
21542 .nr(4)
21543 .kr(2)
21544 .sr(1)
21545 .m(3)
21546 .n(4)
21547 .k(8)
21548 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080021549 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021550 }
21551#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
21552
21553
21554#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan801d2c22021-06-02 21:25:05 -070021555 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_eq_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070021556 TEST_REQUIRES_X86_XOP;
21557 GemmMicrokernelTester()
21558 .mr(1)
21559 .nr(4)
21560 .kr(2)
21561 .sr(1)
21562 .m(1)
21563 .n(4)
21564 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080021565 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021566 }
21567
Marat Dukhan801d2c22021-06-02 21:25:05 -070021568 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070021569 TEST_REQUIRES_X86_XOP;
21570 GemmMicrokernelTester()
21571 .mr(1)
21572 .nr(4)
21573 .kr(2)
21574 .sr(1)
21575 .m(1)
21576 .n(4)
21577 .k(8)
21578 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080021579 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021580 }
21581
Marat Dukhan801d2c22021-06-02 21:25:05 -070021582 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_eq_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070021583 TEST_REQUIRES_X86_XOP;
Zhi An Ng83844ae2022-01-14 09:52:25 -080021584 for (uint32_t n = 1; n <= 4; n++) {
21585 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070021586 GemmMicrokernelTester()
21587 .mr(1)
21588 .nr(4)
21589 .kr(2)
21590 .sr(1)
21591 .m(m)
21592 .n(n)
21593 .k(8)
21594 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021595 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021596 }
21597 }
21598 }
21599
Marat Dukhan801d2c22021-06-02 21:25:05 -070021600 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_eq_8_subtile_m) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070021601 TEST_REQUIRES_X86_XOP;
21602 for (uint32_t m = 1; m <= 1; m++) {
21603 GemmMicrokernelTester()
21604 .mr(1)
21605 .nr(4)
21606 .kr(2)
21607 .sr(1)
21608 .m(m)
21609 .n(4)
21610 .k(8)
21611 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021612 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021613 }
21614 }
21615
Marat Dukhan801d2c22021-06-02 21:25:05 -070021616 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_eq_8_subtile_n) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070021617 TEST_REQUIRES_X86_XOP;
21618 for (uint32_t n = 1; n <= 4; n++) {
21619 GemmMicrokernelTester()
21620 .mr(1)
21621 .nr(4)
21622 .kr(2)
21623 .sr(1)
21624 .m(1)
21625 .n(n)
21626 .k(8)
21627 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021628 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021629 }
21630 }
21631
Marat Dukhan801d2c22021-06-02 21:25:05 -070021632 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_lt_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070021633 TEST_REQUIRES_X86_XOP;
21634 for (size_t k = 1; k < 8; k++) {
21635 GemmMicrokernelTester()
21636 .mr(1)
21637 .nr(4)
21638 .kr(2)
21639 .sr(1)
21640 .m(1)
21641 .n(4)
21642 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080021643 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021644 }
21645 }
21646
Marat Dukhan801d2c22021-06-02 21:25:05 -070021647 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_lt_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070021648 TEST_REQUIRES_X86_XOP;
21649 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021650 for (uint32_t n = 1; n <= 4; n++) {
21651 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070021652 GemmMicrokernelTester()
21653 .mr(1)
21654 .nr(4)
21655 .kr(2)
21656 .sr(1)
21657 .m(m)
21658 .n(n)
21659 .k(k)
21660 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021661 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021662 }
21663 }
21664 }
21665 }
21666
Marat Dukhan801d2c22021-06-02 21:25:05 -070021667 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_gt_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070021668 TEST_REQUIRES_X86_XOP;
21669 for (size_t k = 9; k < 16; k++) {
21670 GemmMicrokernelTester()
21671 .mr(1)
21672 .nr(4)
21673 .kr(2)
21674 .sr(1)
21675 .m(1)
21676 .n(4)
21677 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080021678 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021679 }
21680 }
21681
Marat Dukhan801d2c22021-06-02 21:25:05 -070021682 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_gt_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070021683 TEST_REQUIRES_X86_XOP;
21684 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021685 for (uint32_t n = 1; n <= 4; n++) {
21686 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070021687 GemmMicrokernelTester()
21688 .mr(1)
21689 .nr(4)
21690 .kr(2)
21691 .sr(1)
21692 .m(m)
21693 .n(n)
21694 .k(k)
21695 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021696 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021697 }
21698 }
21699 }
21700 }
21701
Marat Dukhan801d2c22021-06-02 21:25:05 -070021702 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_div_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070021703 TEST_REQUIRES_X86_XOP;
21704 for (size_t k = 16; k <= 80; k += 8) {
21705 GemmMicrokernelTester()
21706 .mr(1)
21707 .nr(4)
21708 .kr(2)
21709 .sr(1)
21710 .m(1)
21711 .n(4)
21712 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080021713 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021714 }
21715 }
21716
Marat Dukhan801d2c22021-06-02 21:25:05 -070021717 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_div_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070021718 TEST_REQUIRES_X86_XOP;
21719 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021720 for (uint32_t n = 1; n <= 4; n++) {
21721 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070021722 GemmMicrokernelTester()
21723 .mr(1)
21724 .nr(4)
21725 .kr(2)
21726 .sr(1)
21727 .m(m)
21728 .n(n)
21729 .k(k)
21730 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021731 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021732 }
21733 }
21734 }
21735 }
21736
Marat Dukhan801d2c22021-06-02 21:25:05 -070021737 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_gt_4) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070021738 TEST_REQUIRES_X86_XOP;
21739 for (uint32_t n = 5; n < 8; n++) {
21740 for (size_t k = 1; k <= 40; k += 9) {
21741 GemmMicrokernelTester()
21742 .mr(1)
21743 .nr(4)
21744 .kr(2)
21745 .sr(1)
21746 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021747 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070021748 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080021749 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021750 }
21751 }
21752 }
21753
Marat Dukhan801d2c22021-06-02 21:25:05 -070021754 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_gt_4_strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070021755 TEST_REQUIRES_X86_XOP;
21756 for (uint32_t n = 5; n < 8; n++) {
21757 for (size_t k = 1; k <= 40; k += 9) {
21758 GemmMicrokernelTester()
21759 .mr(1)
21760 .nr(4)
21761 .kr(2)
21762 .sr(1)
21763 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021764 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070021765 .k(k)
21766 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080021767 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021768 }
21769 }
21770 }
21771
Marat Dukhan801d2c22021-06-02 21:25:05 -070021772 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_gt_4_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070021773 TEST_REQUIRES_X86_XOP;
21774 for (uint32_t n = 5; n < 8; n++) {
21775 for (size_t k = 1; k <= 40; k += 9) {
21776 for (uint32_t m = 1; m <= 1; m++) {
21777 GemmMicrokernelTester()
21778 .mr(1)
21779 .nr(4)
21780 .kr(2)
21781 .sr(1)
21782 .m(m)
21783 .n(n)
21784 .k(k)
21785 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021786 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021787 }
21788 }
21789 }
21790 }
21791
Marat Dukhan801d2c22021-06-02 21:25:05 -070021792 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_div_4) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070021793 TEST_REQUIRES_X86_XOP;
21794 for (uint32_t n = 8; n <= 12; n += 4) {
21795 for (size_t k = 1; k <= 40; k += 9) {
21796 GemmMicrokernelTester()
21797 .mr(1)
21798 .nr(4)
21799 .kr(2)
21800 .sr(1)
21801 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021802 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070021803 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080021804 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021805 }
21806 }
21807 }
21808
Marat Dukhan801d2c22021-06-02 21:25:05 -070021809 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_div_4_strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070021810 TEST_REQUIRES_X86_XOP;
21811 for (uint32_t n = 8; n <= 12; n += 4) {
21812 for (size_t k = 1; k <= 40; k += 9) {
21813 GemmMicrokernelTester()
21814 .mr(1)
21815 .nr(4)
21816 .kr(2)
21817 .sr(1)
21818 .m(1)
21819 .n(n)
21820 .k(k)
21821 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080021822 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021823 }
21824 }
21825 }
21826
Marat Dukhan801d2c22021-06-02 21:25:05 -070021827 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_div_4_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070021828 TEST_REQUIRES_X86_XOP;
21829 for (uint32_t n = 8; n <= 12; n += 4) {
21830 for (size_t k = 1; k <= 40; k += 9) {
21831 for (uint32_t m = 1; m <= 1; m++) {
21832 GemmMicrokernelTester()
21833 .mr(1)
21834 .nr(4)
21835 .kr(2)
21836 .sr(1)
21837 .m(m)
21838 .n(n)
21839 .k(k)
21840 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021841 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021842 }
21843 }
21844 }
21845 }
21846
Marat Dukhan801d2c22021-06-02 21:25:05 -070021847 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, small_kernel) {
21848 TEST_REQUIRES_X86_XOP;
21849 for (size_t k = 1; k <= 40; k += 9) {
21850 GemmMicrokernelTester()
21851 .mr(1)
21852 .nr(4)
21853 .kr(2)
21854 .sr(1)
21855 .m(1)
21856 .n(4)
21857 .k(k)
21858 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080021859 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070021860 }
21861 }
21862
21863 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, small_kernel_subtile) {
21864 TEST_REQUIRES_X86_XOP;
21865 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021866 for (uint32_t n = 1; n <= 4; n++) {
21867 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan801d2c22021-06-02 21:25:05 -070021868 GemmMicrokernelTester()
21869 .mr(1)
21870 .nr(4)
21871 .kr(2)
21872 .sr(1)
21873 .m(m)
21874 .n(n)
21875 .k(k)
21876 .ks(3)
21877 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021878 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070021879 }
21880 }
21881 }
21882 }
21883
21884 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_gt_4_small_kernel) {
21885 TEST_REQUIRES_X86_XOP;
21886 for (uint32_t n = 5; n < 8; n++) {
21887 for (size_t k = 1; k <= 40; k += 9) {
21888 GemmMicrokernelTester()
21889 .mr(1)
21890 .nr(4)
21891 .kr(2)
21892 .sr(1)
21893 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021894 .n(n)
Marat Dukhan801d2c22021-06-02 21:25:05 -070021895 .k(k)
21896 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080021897 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070021898 }
21899 }
21900 }
21901
21902 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_div_4_small_kernel) {
21903 TEST_REQUIRES_X86_XOP;
21904 for (uint32_t n = 8; n <= 12; n += 4) {
21905 for (size_t k = 1; k <= 40; k += 9) {
21906 GemmMicrokernelTester()
21907 .mr(1)
21908 .nr(4)
21909 .kr(2)
21910 .sr(1)
21911 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021912 .n(n)
Marat Dukhan801d2c22021-06-02 21:25:05 -070021913 .k(k)
21914 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080021915 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070021916 }
21917 }
21918 }
21919
21920 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, strided_cm_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070021921 TEST_REQUIRES_X86_XOP;
21922 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021923 for (uint32_t n = 1; n <= 4; n++) {
21924 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070021925 GemmMicrokernelTester()
21926 .mr(1)
21927 .nr(4)
21928 .kr(2)
21929 .sr(1)
21930 .m(m)
21931 .n(n)
21932 .k(k)
21933 .cm_stride(7)
21934 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021935 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021936 }
21937 }
21938 }
21939 }
21940
Marat Dukhan801d2c22021-06-02 21:25:05 -070021941 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, a_offset) {
21942 TEST_REQUIRES_X86_XOP;
21943 for (size_t k = 1; k <= 40; k += 9) {
21944 GemmMicrokernelTester()
21945 .mr(1)
21946 .nr(4)
21947 .kr(2)
21948 .sr(1)
21949 .m(1)
21950 .n(4)
21951 .k(k)
21952 .ks(3)
21953 .a_offset(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080021954 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070021955 }
21956 }
21957
21958 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, zero) {
21959 TEST_REQUIRES_X86_XOP;
Zhi An Ng83844ae2022-01-14 09:52:25 -080021960 for (size_t k = 1; k <= 40; k += 9) {
21961 for (uint32_t mz = 0; mz < 1; mz++) {
Marat Dukhan801d2c22021-06-02 21:25:05 -070021962 GemmMicrokernelTester()
21963 .mr(1)
21964 .nr(4)
21965 .kr(2)
21966 .sr(1)
21967 .m(1)
21968 .n(4)
21969 .k(k)
21970 .ks(3)
21971 .a_offset(43)
21972 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080021973 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070021974 }
21975 }
21976 }
21977
21978 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, qmin) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070021979 TEST_REQUIRES_X86_XOP;
21980 GemmMicrokernelTester()
21981 .mr(1)
21982 .nr(4)
21983 .kr(2)
21984 .sr(1)
21985 .m(1)
21986 .n(4)
21987 .k(8)
21988 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021989 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021990 }
21991
Marat Dukhan801d2c22021-06-02 21:25:05 -070021992 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, qmax) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070021993 TEST_REQUIRES_X86_XOP;
21994 GemmMicrokernelTester()
21995 .mr(1)
21996 .nr(4)
21997 .kr(2)
21998 .sr(1)
21999 .m(1)
22000 .n(4)
22001 .k(8)
22002 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022003 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022004 }
22005
Marat Dukhan801d2c22021-06-02 21:25:05 -070022006 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, strided_cm) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070022007 TEST_REQUIRES_X86_XOP;
22008 GemmMicrokernelTester()
22009 .mr(1)
22010 .nr(4)
22011 .kr(2)
22012 .sr(1)
22013 .m(1)
22014 .n(4)
22015 .k(8)
22016 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080022017 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022018 }
22019#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
22020
22021
22022#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan801d2c22021-06-02 21:25:05 -070022023 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_eq_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070022024 TEST_REQUIRES_X86_XOP;
22025 GemmMicrokernelTester()
22026 .mr(2)
22027 .nr(4)
22028 .kr(2)
22029 .sr(1)
22030 .m(2)
22031 .n(4)
22032 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080022033 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022034 }
22035
Marat Dukhan801d2c22021-06-02 21:25:05 -070022036 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070022037 TEST_REQUIRES_X86_XOP;
22038 GemmMicrokernelTester()
22039 .mr(2)
22040 .nr(4)
22041 .kr(2)
22042 .sr(1)
22043 .m(2)
22044 .n(4)
22045 .k(8)
22046 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080022047 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022048 }
22049
Marat Dukhan801d2c22021-06-02 21:25:05 -070022050 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_eq_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070022051 TEST_REQUIRES_X86_XOP;
Zhi An Ng83844ae2022-01-14 09:52:25 -080022052 for (uint32_t n = 1; n <= 4; n++) {
22053 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070022054 GemmMicrokernelTester()
22055 .mr(2)
22056 .nr(4)
22057 .kr(2)
22058 .sr(1)
22059 .m(m)
22060 .n(n)
22061 .k(8)
22062 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022063 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022064 }
22065 }
22066 }
22067
Marat Dukhan801d2c22021-06-02 21:25:05 -070022068 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_eq_8_subtile_m) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070022069 TEST_REQUIRES_X86_XOP;
22070 for (uint32_t m = 1; m <= 2; m++) {
22071 GemmMicrokernelTester()
22072 .mr(2)
22073 .nr(4)
22074 .kr(2)
22075 .sr(1)
22076 .m(m)
22077 .n(4)
22078 .k(8)
22079 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022080 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022081 }
22082 }
22083
Marat Dukhan801d2c22021-06-02 21:25:05 -070022084 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_eq_8_subtile_n) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070022085 TEST_REQUIRES_X86_XOP;
22086 for (uint32_t n = 1; n <= 4; n++) {
22087 GemmMicrokernelTester()
22088 .mr(2)
22089 .nr(4)
22090 .kr(2)
22091 .sr(1)
22092 .m(2)
22093 .n(n)
22094 .k(8)
22095 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022096 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022097 }
22098 }
22099
Marat Dukhan801d2c22021-06-02 21:25:05 -070022100 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_lt_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070022101 TEST_REQUIRES_X86_XOP;
22102 for (size_t k = 1; k < 8; k++) {
22103 GemmMicrokernelTester()
22104 .mr(2)
22105 .nr(4)
22106 .kr(2)
22107 .sr(1)
22108 .m(2)
22109 .n(4)
22110 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080022111 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022112 }
22113 }
22114
Marat Dukhan801d2c22021-06-02 21:25:05 -070022115 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_lt_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070022116 TEST_REQUIRES_X86_XOP;
22117 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022118 for (uint32_t n = 1; n <= 4; n++) {
22119 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070022120 GemmMicrokernelTester()
22121 .mr(2)
22122 .nr(4)
22123 .kr(2)
22124 .sr(1)
22125 .m(m)
22126 .n(n)
22127 .k(k)
22128 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022129 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022130 }
22131 }
22132 }
22133 }
22134
Marat Dukhan801d2c22021-06-02 21:25:05 -070022135 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_gt_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070022136 TEST_REQUIRES_X86_XOP;
22137 for (size_t k = 9; k < 16; k++) {
22138 GemmMicrokernelTester()
22139 .mr(2)
22140 .nr(4)
22141 .kr(2)
22142 .sr(1)
22143 .m(2)
22144 .n(4)
22145 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080022146 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022147 }
22148 }
22149
Marat Dukhan801d2c22021-06-02 21:25:05 -070022150 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_gt_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070022151 TEST_REQUIRES_X86_XOP;
22152 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022153 for (uint32_t n = 1; n <= 4; n++) {
22154 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070022155 GemmMicrokernelTester()
22156 .mr(2)
22157 .nr(4)
22158 .kr(2)
22159 .sr(1)
22160 .m(m)
22161 .n(n)
22162 .k(k)
22163 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022164 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022165 }
22166 }
22167 }
22168 }
22169
Marat Dukhan801d2c22021-06-02 21:25:05 -070022170 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_div_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070022171 TEST_REQUIRES_X86_XOP;
22172 for (size_t k = 16; k <= 80; k += 8) {
22173 GemmMicrokernelTester()
22174 .mr(2)
22175 .nr(4)
22176 .kr(2)
22177 .sr(1)
22178 .m(2)
22179 .n(4)
22180 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080022181 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022182 }
22183 }
22184
Marat Dukhan801d2c22021-06-02 21:25:05 -070022185 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_div_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070022186 TEST_REQUIRES_X86_XOP;
22187 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022188 for (uint32_t n = 1; n <= 4; n++) {
22189 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070022190 GemmMicrokernelTester()
22191 .mr(2)
22192 .nr(4)
22193 .kr(2)
22194 .sr(1)
22195 .m(m)
22196 .n(n)
22197 .k(k)
22198 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022199 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022200 }
22201 }
22202 }
22203 }
22204
Marat Dukhan801d2c22021-06-02 21:25:05 -070022205 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_gt_4) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070022206 TEST_REQUIRES_X86_XOP;
22207 for (uint32_t n = 5; n < 8; n++) {
22208 for (size_t k = 1; k <= 40; k += 9) {
22209 GemmMicrokernelTester()
22210 .mr(2)
22211 .nr(4)
22212 .kr(2)
22213 .sr(1)
22214 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022215 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070022216 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080022217 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022218 }
22219 }
22220 }
22221
Marat Dukhan801d2c22021-06-02 21:25:05 -070022222 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_gt_4_strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070022223 TEST_REQUIRES_X86_XOP;
22224 for (uint32_t n = 5; n < 8; n++) {
22225 for (size_t k = 1; k <= 40; k += 9) {
22226 GemmMicrokernelTester()
22227 .mr(2)
22228 .nr(4)
22229 .kr(2)
22230 .sr(1)
22231 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022232 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070022233 .k(k)
22234 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080022235 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022236 }
22237 }
22238 }
22239
Marat Dukhan801d2c22021-06-02 21:25:05 -070022240 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_gt_4_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070022241 TEST_REQUIRES_X86_XOP;
22242 for (uint32_t n = 5; n < 8; n++) {
22243 for (size_t k = 1; k <= 40; k += 9) {
22244 for (uint32_t m = 1; m <= 2; m++) {
22245 GemmMicrokernelTester()
22246 .mr(2)
22247 .nr(4)
22248 .kr(2)
22249 .sr(1)
22250 .m(m)
22251 .n(n)
22252 .k(k)
22253 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022254 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022255 }
22256 }
22257 }
22258 }
22259
Marat Dukhan801d2c22021-06-02 21:25:05 -070022260 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_div_4) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070022261 TEST_REQUIRES_X86_XOP;
22262 for (uint32_t n = 8; n <= 12; n += 4) {
22263 for (size_t k = 1; k <= 40; k += 9) {
22264 GemmMicrokernelTester()
22265 .mr(2)
22266 .nr(4)
22267 .kr(2)
22268 .sr(1)
22269 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022270 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070022271 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080022272 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022273 }
22274 }
22275 }
22276
Marat Dukhan801d2c22021-06-02 21:25:05 -070022277 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_div_4_strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070022278 TEST_REQUIRES_X86_XOP;
22279 for (uint32_t n = 8; n <= 12; n += 4) {
22280 for (size_t k = 1; k <= 40; k += 9) {
22281 GemmMicrokernelTester()
22282 .mr(2)
22283 .nr(4)
22284 .kr(2)
22285 .sr(1)
22286 .m(2)
22287 .n(n)
22288 .k(k)
22289 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080022290 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022291 }
22292 }
22293 }
22294
Marat Dukhan801d2c22021-06-02 21:25:05 -070022295 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_div_4_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070022296 TEST_REQUIRES_X86_XOP;
22297 for (uint32_t n = 8; n <= 12; n += 4) {
22298 for (size_t k = 1; k <= 40; k += 9) {
22299 for (uint32_t m = 1; m <= 2; m++) {
22300 GemmMicrokernelTester()
22301 .mr(2)
22302 .nr(4)
22303 .kr(2)
22304 .sr(1)
22305 .m(m)
22306 .n(n)
22307 .k(k)
22308 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022309 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022310 }
22311 }
22312 }
22313 }
22314
Marat Dukhan801d2c22021-06-02 21:25:05 -070022315 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, small_kernel) {
22316 TEST_REQUIRES_X86_XOP;
22317 for (size_t k = 1; k <= 40; k += 9) {
22318 GemmMicrokernelTester()
22319 .mr(2)
22320 .nr(4)
22321 .kr(2)
22322 .sr(1)
22323 .m(2)
22324 .n(4)
22325 .k(k)
22326 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080022327 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070022328 }
22329 }
22330
22331 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, small_kernel_subtile) {
22332 TEST_REQUIRES_X86_XOP;
22333 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022334 for (uint32_t n = 1; n <= 4; n++) {
22335 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan801d2c22021-06-02 21:25:05 -070022336 GemmMicrokernelTester()
22337 .mr(2)
22338 .nr(4)
22339 .kr(2)
22340 .sr(1)
22341 .m(m)
22342 .n(n)
22343 .k(k)
22344 .ks(3)
22345 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022346 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070022347 }
22348 }
22349 }
22350 }
22351
22352 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_gt_4_small_kernel) {
22353 TEST_REQUIRES_X86_XOP;
22354 for (uint32_t n = 5; n < 8; n++) {
22355 for (size_t k = 1; k <= 40; k += 9) {
22356 GemmMicrokernelTester()
22357 .mr(2)
22358 .nr(4)
22359 .kr(2)
22360 .sr(1)
22361 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022362 .n(n)
Marat Dukhan801d2c22021-06-02 21:25:05 -070022363 .k(k)
22364 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080022365 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070022366 }
22367 }
22368 }
22369
22370 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_div_4_small_kernel) {
22371 TEST_REQUIRES_X86_XOP;
22372 for (uint32_t n = 8; n <= 12; n += 4) {
22373 for (size_t k = 1; k <= 40; k += 9) {
22374 GemmMicrokernelTester()
22375 .mr(2)
22376 .nr(4)
22377 .kr(2)
22378 .sr(1)
22379 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022380 .n(n)
Marat Dukhan801d2c22021-06-02 21:25:05 -070022381 .k(k)
22382 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080022383 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070022384 }
22385 }
22386 }
22387
22388 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, strided_cm_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070022389 TEST_REQUIRES_X86_XOP;
22390 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022391 for (uint32_t n = 1; n <= 4; n++) {
22392 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070022393 GemmMicrokernelTester()
22394 .mr(2)
22395 .nr(4)
22396 .kr(2)
22397 .sr(1)
22398 .m(m)
22399 .n(n)
22400 .k(k)
22401 .cm_stride(7)
22402 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022403 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022404 }
22405 }
22406 }
22407 }
22408
Marat Dukhan801d2c22021-06-02 21:25:05 -070022409 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, a_offset) {
22410 TEST_REQUIRES_X86_XOP;
22411 for (size_t k = 1; k <= 40; k += 9) {
22412 GemmMicrokernelTester()
22413 .mr(2)
22414 .nr(4)
22415 .kr(2)
22416 .sr(1)
22417 .m(2)
22418 .n(4)
22419 .k(k)
22420 .ks(3)
22421 .a_offset(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080022422 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070022423 }
22424 }
22425
22426 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, zero) {
22427 TEST_REQUIRES_X86_XOP;
Zhi An Ng83844ae2022-01-14 09:52:25 -080022428 for (size_t k = 1; k <= 40; k += 9) {
22429 for (uint32_t mz = 0; mz < 2; mz++) {
Marat Dukhan801d2c22021-06-02 21:25:05 -070022430 GemmMicrokernelTester()
22431 .mr(2)
22432 .nr(4)
22433 .kr(2)
22434 .sr(1)
22435 .m(2)
22436 .n(4)
22437 .k(k)
22438 .ks(3)
22439 .a_offset(83)
22440 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080022441 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070022442 }
22443 }
22444 }
22445
22446 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, qmin) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070022447 TEST_REQUIRES_X86_XOP;
22448 GemmMicrokernelTester()
22449 .mr(2)
22450 .nr(4)
22451 .kr(2)
22452 .sr(1)
22453 .m(2)
22454 .n(4)
22455 .k(8)
22456 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022457 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022458 }
22459
Marat Dukhan801d2c22021-06-02 21:25:05 -070022460 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, qmax) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070022461 TEST_REQUIRES_X86_XOP;
22462 GemmMicrokernelTester()
22463 .mr(2)
22464 .nr(4)
22465 .kr(2)
22466 .sr(1)
22467 .m(2)
22468 .n(4)
22469 .k(8)
22470 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022471 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022472 }
22473
Marat Dukhan801d2c22021-06-02 21:25:05 -070022474 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, strided_cm) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070022475 TEST_REQUIRES_X86_XOP;
22476 GemmMicrokernelTester()
22477 .mr(2)
22478 .nr(4)
22479 .kr(2)
22480 .sr(1)
22481 .m(2)
22482 .n(4)
22483 .k(8)
22484 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080022485 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022486 }
22487#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
22488
22489
22490#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan801d2c22021-06-02 21:25:05 -070022491 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_eq_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070022492 TEST_REQUIRES_X86_XOP;
22493 GemmMicrokernelTester()
22494 .mr(4)
22495 .nr(4)
22496 .kr(2)
22497 .sr(1)
22498 .m(4)
22499 .n(4)
22500 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080022501 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022502 }
22503
Marat Dukhan801d2c22021-06-02 21:25:05 -070022504 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070022505 TEST_REQUIRES_X86_XOP;
22506 GemmMicrokernelTester()
22507 .mr(4)
22508 .nr(4)
22509 .kr(2)
22510 .sr(1)
22511 .m(4)
22512 .n(4)
22513 .k(8)
22514 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080022515 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022516 }
22517
Marat Dukhan801d2c22021-06-02 21:25:05 -070022518 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_eq_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070022519 TEST_REQUIRES_X86_XOP;
Zhi An Ng83844ae2022-01-14 09:52:25 -080022520 for (uint32_t n = 1; n <= 4; n++) {
22521 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070022522 GemmMicrokernelTester()
22523 .mr(4)
22524 .nr(4)
22525 .kr(2)
22526 .sr(1)
22527 .m(m)
22528 .n(n)
22529 .k(8)
22530 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022531 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022532 }
22533 }
22534 }
22535
Marat Dukhan801d2c22021-06-02 21:25:05 -070022536 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_eq_8_subtile_m) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070022537 TEST_REQUIRES_X86_XOP;
22538 for (uint32_t m = 1; m <= 4; m++) {
22539 GemmMicrokernelTester()
22540 .mr(4)
22541 .nr(4)
22542 .kr(2)
22543 .sr(1)
22544 .m(m)
22545 .n(4)
22546 .k(8)
22547 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022548 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022549 }
22550 }
22551
Marat Dukhan801d2c22021-06-02 21:25:05 -070022552 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_eq_8_subtile_n) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070022553 TEST_REQUIRES_X86_XOP;
22554 for (uint32_t n = 1; n <= 4; n++) {
22555 GemmMicrokernelTester()
22556 .mr(4)
22557 .nr(4)
22558 .kr(2)
22559 .sr(1)
22560 .m(4)
22561 .n(n)
22562 .k(8)
22563 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022564 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022565 }
22566 }
22567
Marat Dukhan801d2c22021-06-02 21:25:05 -070022568 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_lt_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070022569 TEST_REQUIRES_X86_XOP;
22570 for (size_t k = 1; k < 8; k++) {
22571 GemmMicrokernelTester()
22572 .mr(4)
22573 .nr(4)
22574 .kr(2)
22575 .sr(1)
22576 .m(4)
22577 .n(4)
22578 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080022579 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022580 }
22581 }
22582
Marat Dukhan801d2c22021-06-02 21:25:05 -070022583 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_lt_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070022584 TEST_REQUIRES_X86_XOP;
22585 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022586 for (uint32_t n = 1; n <= 4; n++) {
22587 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070022588 GemmMicrokernelTester()
22589 .mr(4)
22590 .nr(4)
22591 .kr(2)
22592 .sr(1)
22593 .m(m)
22594 .n(n)
22595 .k(k)
22596 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022597 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022598 }
22599 }
22600 }
22601 }
22602
Marat Dukhan801d2c22021-06-02 21:25:05 -070022603 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_gt_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070022604 TEST_REQUIRES_X86_XOP;
22605 for (size_t k = 9; k < 16; k++) {
22606 GemmMicrokernelTester()
22607 .mr(4)
22608 .nr(4)
22609 .kr(2)
22610 .sr(1)
22611 .m(4)
22612 .n(4)
22613 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080022614 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022615 }
22616 }
22617
Marat Dukhan801d2c22021-06-02 21:25:05 -070022618 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_gt_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070022619 TEST_REQUIRES_X86_XOP;
22620 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022621 for (uint32_t n = 1; n <= 4; n++) {
22622 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070022623 GemmMicrokernelTester()
22624 .mr(4)
22625 .nr(4)
22626 .kr(2)
22627 .sr(1)
22628 .m(m)
22629 .n(n)
22630 .k(k)
22631 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022632 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022633 }
22634 }
22635 }
22636 }
22637
Marat Dukhan801d2c22021-06-02 21:25:05 -070022638 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_div_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070022639 TEST_REQUIRES_X86_XOP;
22640 for (size_t k = 16; k <= 80; k += 8) {
22641 GemmMicrokernelTester()
22642 .mr(4)
22643 .nr(4)
22644 .kr(2)
22645 .sr(1)
22646 .m(4)
22647 .n(4)
22648 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080022649 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022650 }
22651 }
22652
Marat Dukhan801d2c22021-06-02 21:25:05 -070022653 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_div_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070022654 TEST_REQUIRES_X86_XOP;
22655 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022656 for (uint32_t n = 1; n <= 4; n++) {
22657 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070022658 GemmMicrokernelTester()
22659 .mr(4)
22660 .nr(4)
22661 .kr(2)
22662 .sr(1)
22663 .m(m)
22664 .n(n)
22665 .k(k)
22666 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022667 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022668 }
22669 }
22670 }
22671 }
22672
Marat Dukhan801d2c22021-06-02 21:25:05 -070022673 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_gt_4) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070022674 TEST_REQUIRES_X86_XOP;
22675 for (uint32_t n = 5; n < 8; n++) {
22676 for (size_t k = 1; k <= 40; k += 9) {
22677 GemmMicrokernelTester()
22678 .mr(4)
22679 .nr(4)
22680 .kr(2)
22681 .sr(1)
22682 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022683 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070022684 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080022685 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022686 }
22687 }
22688 }
22689
Marat Dukhan801d2c22021-06-02 21:25:05 -070022690 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_gt_4_strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070022691 TEST_REQUIRES_X86_XOP;
22692 for (uint32_t n = 5; n < 8; n++) {
22693 for (size_t k = 1; k <= 40; k += 9) {
22694 GemmMicrokernelTester()
22695 .mr(4)
22696 .nr(4)
22697 .kr(2)
22698 .sr(1)
22699 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022700 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070022701 .k(k)
22702 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080022703 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022704 }
22705 }
22706 }
22707
Marat Dukhan801d2c22021-06-02 21:25:05 -070022708 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_gt_4_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070022709 TEST_REQUIRES_X86_XOP;
22710 for (uint32_t n = 5; n < 8; n++) {
22711 for (size_t k = 1; k <= 40; k += 9) {
22712 for (uint32_t m = 1; m <= 4; m++) {
22713 GemmMicrokernelTester()
22714 .mr(4)
22715 .nr(4)
22716 .kr(2)
22717 .sr(1)
22718 .m(m)
22719 .n(n)
22720 .k(k)
22721 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022722 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022723 }
22724 }
22725 }
22726 }
22727
Marat Dukhan801d2c22021-06-02 21:25:05 -070022728 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_div_4) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070022729 TEST_REQUIRES_X86_XOP;
22730 for (uint32_t n = 8; n <= 12; n += 4) {
22731 for (size_t k = 1; k <= 40; k += 9) {
22732 GemmMicrokernelTester()
22733 .mr(4)
22734 .nr(4)
22735 .kr(2)
22736 .sr(1)
22737 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022738 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070022739 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080022740 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022741 }
22742 }
22743 }
22744
Marat Dukhan801d2c22021-06-02 21:25:05 -070022745 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_div_4_strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070022746 TEST_REQUIRES_X86_XOP;
22747 for (uint32_t n = 8; n <= 12; n += 4) {
22748 for (size_t k = 1; k <= 40; k += 9) {
22749 GemmMicrokernelTester()
22750 .mr(4)
22751 .nr(4)
22752 .kr(2)
22753 .sr(1)
22754 .m(4)
22755 .n(n)
22756 .k(k)
22757 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080022758 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022759 }
22760 }
22761 }
22762
Marat Dukhan801d2c22021-06-02 21:25:05 -070022763 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_div_4_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070022764 TEST_REQUIRES_X86_XOP;
22765 for (uint32_t n = 8; n <= 12; n += 4) {
22766 for (size_t k = 1; k <= 40; k += 9) {
22767 for (uint32_t m = 1; m <= 4; m++) {
22768 GemmMicrokernelTester()
22769 .mr(4)
22770 .nr(4)
22771 .kr(2)
22772 .sr(1)
22773 .m(m)
22774 .n(n)
22775 .k(k)
22776 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022777 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022778 }
22779 }
22780 }
22781 }
22782
Marat Dukhan801d2c22021-06-02 21:25:05 -070022783 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, small_kernel) {
22784 TEST_REQUIRES_X86_XOP;
22785 for (size_t k = 1; k <= 40; k += 9) {
22786 GemmMicrokernelTester()
22787 .mr(4)
22788 .nr(4)
22789 .kr(2)
22790 .sr(1)
22791 .m(4)
22792 .n(4)
22793 .k(k)
22794 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080022795 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070022796 }
22797 }
22798
22799 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, small_kernel_subtile) {
22800 TEST_REQUIRES_X86_XOP;
22801 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022802 for (uint32_t n = 1; n <= 4; n++) {
22803 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan801d2c22021-06-02 21:25:05 -070022804 GemmMicrokernelTester()
22805 .mr(4)
22806 .nr(4)
22807 .kr(2)
22808 .sr(1)
22809 .m(m)
22810 .n(n)
22811 .k(k)
22812 .ks(3)
22813 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022814 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070022815 }
22816 }
22817 }
22818 }
22819
22820 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_gt_4_small_kernel) {
22821 TEST_REQUIRES_X86_XOP;
22822 for (uint32_t n = 5; n < 8; n++) {
22823 for (size_t k = 1; k <= 40; k += 9) {
22824 GemmMicrokernelTester()
22825 .mr(4)
22826 .nr(4)
22827 .kr(2)
22828 .sr(1)
22829 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022830 .n(n)
Marat Dukhan801d2c22021-06-02 21:25:05 -070022831 .k(k)
22832 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080022833 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070022834 }
22835 }
22836 }
22837
22838 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_div_4_small_kernel) {
22839 TEST_REQUIRES_X86_XOP;
22840 for (uint32_t n = 8; n <= 12; n += 4) {
22841 for (size_t k = 1; k <= 40; k += 9) {
22842 GemmMicrokernelTester()
22843 .mr(4)
22844 .nr(4)
22845 .kr(2)
22846 .sr(1)
22847 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022848 .n(n)
Marat Dukhan801d2c22021-06-02 21:25:05 -070022849 .k(k)
22850 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080022851 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070022852 }
22853 }
22854 }
22855
22856 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, strided_cm_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070022857 TEST_REQUIRES_X86_XOP;
22858 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022859 for (uint32_t n = 1; n <= 4; n++) {
22860 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070022861 GemmMicrokernelTester()
22862 .mr(4)
22863 .nr(4)
22864 .kr(2)
22865 .sr(1)
22866 .m(m)
22867 .n(n)
22868 .k(k)
22869 .cm_stride(7)
22870 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022871 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022872 }
22873 }
22874 }
22875 }
22876
Marat Dukhan801d2c22021-06-02 21:25:05 -070022877 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, a_offset) {
22878 TEST_REQUIRES_X86_XOP;
22879 for (size_t k = 1; k <= 40; k += 9) {
22880 GemmMicrokernelTester()
22881 .mr(4)
22882 .nr(4)
22883 .kr(2)
22884 .sr(1)
22885 .m(4)
22886 .n(4)
22887 .k(k)
22888 .ks(3)
22889 .a_offset(163)
Marat Dukhan50323b82022-01-11 00:12:01 -080022890 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070022891 }
22892 }
22893
22894 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, zero) {
22895 TEST_REQUIRES_X86_XOP;
Zhi An Ng83844ae2022-01-14 09:52:25 -080022896 for (size_t k = 1; k <= 40; k += 9) {
22897 for (uint32_t mz = 0; mz < 4; mz++) {
Marat Dukhan801d2c22021-06-02 21:25:05 -070022898 GemmMicrokernelTester()
22899 .mr(4)
22900 .nr(4)
22901 .kr(2)
22902 .sr(1)
22903 .m(4)
22904 .n(4)
22905 .k(k)
22906 .ks(3)
22907 .a_offset(163)
22908 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080022909 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070022910 }
22911 }
22912 }
22913
22914 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, qmin) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070022915 TEST_REQUIRES_X86_XOP;
22916 GemmMicrokernelTester()
22917 .mr(4)
22918 .nr(4)
22919 .kr(2)
22920 .sr(1)
22921 .m(4)
22922 .n(4)
22923 .k(8)
22924 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022925 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022926 }
22927
Marat Dukhan801d2c22021-06-02 21:25:05 -070022928 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, qmax) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070022929 TEST_REQUIRES_X86_XOP;
22930 GemmMicrokernelTester()
22931 .mr(4)
22932 .nr(4)
22933 .kr(2)
22934 .sr(1)
22935 .m(4)
22936 .n(4)
22937 .k(8)
22938 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022939 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022940 }
22941
Marat Dukhan801d2c22021-06-02 21:25:05 -070022942 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, strided_cm) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070022943 TEST_REQUIRES_X86_XOP;
22944 GemmMicrokernelTester()
22945 .mr(4)
22946 .nr(4)
22947 .kr(2)
22948 .sr(1)
22949 .m(4)
22950 .n(4)
22951 .k(8)
22952 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080022953 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022954 }
22955#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
22956
22957
22958#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan801d2c22021-06-02 21:25:05 -070022959 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_eq_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070022960 TEST_REQUIRES_X86_SSE2;
22961 GemmMicrokernelTester()
22962 .mr(1)
22963 .nr(4)
22964 .kr(8)
22965 .sr(1)
22966 .m(1)
22967 .n(4)
22968 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080022969 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022970 }
22971
Marat Dukhan801d2c22021-06-02 21:25:05 -070022972 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070022973 TEST_REQUIRES_X86_SSE2;
22974 GemmMicrokernelTester()
22975 .mr(1)
22976 .nr(4)
22977 .kr(8)
22978 .sr(1)
22979 .m(1)
22980 .n(4)
22981 .k(8)
22982 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080022983 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022984 }
22985
Marat Dukhan801d2c22021-06-02 21:25:05 -070022986 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_eq_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070022987 TEST_REQUIRES_X86_SSE2;
Zhi An Ng83844ae2022-01-14 09:52:25 -080022988 for (uint32_t n = 1; n <= 4; n++) {
22989 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070022990 GemmMicrokernelTester()
22991 .mr(1)
22992 .nr(4)
22993 .kr(8)
22994 .sr(1)
22995 .m(m)
22996 .n(n)
22997 .k(8)
22998 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022999 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023000 }
23001 }
23002 }
23003
Marat Dukhan801d2c22021-06-02 21:25:05 -070023004 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_eq_8_subtile_m) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070023005 TEST_REQUIRES_X86_SSE2;
23006 for (uint32_t m = 1; m <= 1; m++) {
23007 GemmMicrokernelTester()
23008 .mr(1)
23009 .nr(4)
23010 .kr(8)
23011 .sr(1)
23012 .m(m)
23013 .n(4)
23014 .k(8)
23015 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023016 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023017 }
23018 }
23019
Marat Dukhan801d2c22021-06-02 21:25:05 -070023020 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_eq_8_subtile_n) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070023021 TEST_REQUIRES_X86_SSE2;
23022 for (uint32_t n = 1; n <= 4; n++) {
23023 GemmMicrokernelTester()
23024 .mr(1)
23025 .nr(4)
23026 .kr(8)
23027 .sr(1)
23028 .m(1)
23029 .n(n)
23030 .k(8)
23031 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023032 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023033 }
23034 }
23035
Marat Dukhan801d2c22021-06-02 21:25:05 -070023036 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_lt_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070023037 TEST_REQUIRES_X86_SSE2;
23038 for (size_t k = 1; k < 8; k++) {
23039 GemmMicrokernelTester()
23040 .mr(1)
23041 .nr(4)
23042 .kr(8)
23043 .sr(1)
23044 .m(1)
23045 .n(4)
23046 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080023047 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023048 }
23049 }
23050
Marat Dukhan801d2c22021-06-02 21:25:05 -070023051 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_lt_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070023052 TEST_REQUIRES_X86_SSE2;
23053 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023054 for (uint32_t n = 1; n <= 4; n++) {
23055 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070023056 GemmMicrokernelTester()
23057 .mr(1)
23058 .nr(4)
23059 .kr(8)
23060 .sr(1)
23061 .m(m)
23062 .n(n)
23063 .k(k)
23064 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023065 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023066 }
23067 }
23068 }
23069 }
23070
Marat Dukhan801d2c22021-06-02 21:25:05 -070023071 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_gt_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070023072 TEST_REQUIRES_X86_SSE2;
23073 for (size_t k = 9; k < 16; k++) {
23074 GemmMicrokernelTester()
23075 .mr(1)
23076 .nr(4)
23077 .kr(8)
23078 .sr(1)
23079 .m(1)
23080 .n(4)
23081 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080023082 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023083 }
23084 }
23085
Marat Dukhan801d2c22021-06-02 21:25:05 -070023086 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_gt_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070023087 TEST_REQUIRES_X86_SSE2;
23088 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023089 for (uint32_t n = 1; n <= 4; n++) {
23090 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070023091 GemmMicrokernelTester()
23092 .mr(1)
23093 .nr(4)
23094 .kr(8)
23095 .sr(1)
23096 .m(m)
23097 .n(n)
23098 .k(k)
23099 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023100 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023101 }
23102 }
23103 }
23104 }
23105
Marat Dukhan801d2c22021-06-02 21:25:05 -070023106 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_div_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070023107 TEST_REQUIRES_X86_SSE2;
23108 for (size_t k = 16; k <= 80; k += 8) {
23109 GemmMicrokernelTester()
23110 .mr(1)
23111 .nr(4)
23112 .kr(8)
23113 .sr(1)
23114 .m(1)
23115 .n(4)
23116 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080023117 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023118 }
23119 }
23120
Marat Dukhan801d2c22021-06-02 21:25:05 -070023121 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_div_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070023122 TEST_REQUIRES_X86_SSE2;
23123 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023124 for (uint32_t n = 1; n <= 4; n++) {
23125 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070023126 GemmMicrokernelTester()
23127 .mr(1)
23128 .nr(4)
23129 .kr(8)
23130 .sr(1)
23131 .m(m)
23132 .n(n)
23133 .k(k)
23134 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023135 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023136 }
23137 }
23138 }
23139 }
23140
Marat Dukhan801d2c22021-06-02 21:25:05 -070023141 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_gt_4) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070023142 TEST_REQUIRES_X86_SSE2;
23143 for (uint32_t n = 5; n < 8; n++) {
23144 for (size_t k = 1; k <= 40; k += 9) {
23145 GemmMicrokernelTester()
23146 .mr(1)
23147 .nr(4)
23148 .kr(8)
23149 .sr(1)
23150 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023151 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070023152 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080023153 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023154 }
23155 }
23156 }
23157
Marat Dukhan801d2c22021-06-02 21:25:05 -070023158 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_gt_4_strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070023159 TEST_REQUIRES_X86_SSE2;
23160 for (uint32_t n = 5; n < 8; n++) {
23161 for (size_t k = 1; k <= 40; k += 9) {
23162 GemmMicrokernelTester()
23163 .mr(1)
23164 .nr(4)
23165 .kr(8)
23166 .sr(1)
23167 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023168 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070023169 .k(k)
23170 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080023171 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023172 }
23173 }
23174 }
23175
Marat Dukhan801d2c22021-06-02 21:25:05 -070023176 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_gt_4_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070023177 TEST_REQUIRES_X86_SSE2;
23178 for (uint32_t n = 5; n < 8; n++) {
23179 for (size_t k = 1; k <= 40; k += 9) {
23180 for (uint32_t m = 1; m <= 1; m++) {
23181 GemmMicrokernelTester()
23182 .mr(1)
23183 .nr(4)
23184 .kr(8)
23185 .sr(1)
23186 .m(m)
23187 .n(n)
23188 .k(k)
23189 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023190 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023191 }
23192 }
23193 }
23194 }
23195
Marat Dukhan801d2c22021-06-02 21:25:05 -070023196 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_div_4) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070023197 TEST_REQUIRES_X86_SSE2;
23198 for (uint32_t n = 8; n <= 12; n += 4) {
23199 for (size_t k = 1; k <= 40; k += 9) {
23200 GemmMicrokernelTester()
23201 .mr(1)
23202 .nr(4)
23203 .kr(8)
23204 .sr(1)
23205 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023206 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070023207 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080023208 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023209 }
23210 }
23211 }
23212
Marat Dukhan801d2c22021-06-02 21:25:05 -070023213 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_div_4_strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070023214 TEST_REQUIRES_X86_SSE2;
23215 for (uint32_t n = 8; n <= 12; n += 4) {
23216 for (size_t k = 1; k <= 40; k += 9) {
23217 GemmMicrokernelTester()
23218 .mr(1)
23219 .nr(4)
23220 .kr(8)
23221 .sr(1)
23222 .m(1)
23223 .n(n)
23224 .k(k)
23225 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080023226 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023227 }
23228 }
23229 }
23230
Marat Dukhan801d2c22021-06-02 21:25:05 -070023231 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_div_4_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070023232 TEST_REQUIRES_X86_SSE2;
23233 for (uint32_t n = 8; n <= 12; n += 4) {
23234 for (size_t k = 1; k <= 40; k += 9) {
23235 for (uint32_t m = 1; m <= 1; m++) {
23236 GemmMicrokernelTester()
23237 .mr(1)
23238 .nr(4)
23239 .kr(8)
23240 .sr(1)
23241 .m(m)
23242 .n(n)
23243 .k(k)
23244 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023245 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023246 }
23247 }
23248 }
23249 }
23250
Marat Dukhan801d2c22021-06-02 21:25:05 -070023251 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, small_kernel) {
23252 TEST_REQUIRES_X86_SSE2;
23253 for (size_t k = 1; k <= 40; k += 9) {
23254 GemmMicrokernelTester()
23255 .mr(1)
23256 .nr(4)
23257 .kr(8)
23258 .sr(1)
23259 .m(1)
23260 .n(4)
23261 .k(k)
23262 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080023263 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070023264 }
23265 }
23266
23267 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, small_kernel_subtile) {
23268 TEST_REQUIRES_X86_SSE2;
23269 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023270 for (uint32_t n = 1; n <= 4; n++) {
23271 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan801d2c22021-06-02 21:25:05 -070023272 GemmMicrokernelTester()
23273 .mr(1)
23274 .nr(4)
23275 .kr(8)
23276 .sr(1)
23277 .m(m)
23278 .n(n)
23279 .k(k)
23280 .ks(3)
23281 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023282 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070023283 }
23284 }
23285 }
23286 }
23287
23288 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_gt_4_small_kernel) {
23289 TEST_REQUIRES_X86_SSE2;
23290 for (uint32_t n = 5; n < 8; n++) {
23291 for (size_t k = 1; k <= 40; k += 9) {
23292 GemmMicrokernelTester()
23293 .mr(1)
23294 .nr(4)
23295 .kr(8)
23296 .sr(1)
23297 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023298 .n(n)
Marat Dukhan801d2c22021-06-02 21:25:05 -070023299 .k(k)
23300 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080023301 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070023302 }
23303 }
23304 }
23305
23306 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_div_4_small_kernel) {
23307 TEST_REQUIRES_X86_SSE2;
23308 for (uint32_t n = 8; n <= 12; n += 4) {
23309 for (size_t k = 1; k <= 40; k += 9) {
23310 GemmMicrokernelTester()
23311 .mr(1)
23312 .nr(4)
23313 .kr(8)
23314 .sr(1)
23315 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023316 .n(n)
Marat Dukhan801d2c22021-06-02 21:25:05 -070023317 .k(k)
23318 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080023319 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070023320 }
23321 }
23322 }
23323
23324 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, strided_cm_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070023325 TEST_REQUIRES_X86_SSE2;
23326 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023327 for (uint32_t n = 1; n <= 4; n++) {
23328 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070023329 GemmMicrokernelTester()
23330 .mr(1)
23331 .nr(4)
23332 .kr(8)
23333 .sr(1)
23334 .m(m)
23335 .n(n)
23336 .k(k)
23337 .cm_stride(7)
23338 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023339 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023340 }
23341 }
23342 }
23343 }
23344
Marat Dukhan801d2c22021-06-02 21:25:05 -070023345 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, a_offset) {
23346 TEST_REQUIRES_X86_SSE2;
23347 for (size_t k = 1; k <= 40; k += 9) {
23348 GemmMicrokernelTester()
23349 .mr(1)
23350 .nr(4)
23351 .kr(8)
23352 .sr(1)
23353 .m(1)
23354 .n(4)
23355 .k(k)
23356 .ks(3)
23357 .a_offset(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080023358 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070023359 }
23360 }
23361
23362 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, zero) {
23363 TEST_REQUIRES_X86_SSE2;
Zhi An Ng83844ae2022-01-14 09:52:25 -080023364 for (size_t k = 1; k <= 40; k += 9) {
23365 for (uint32_t mz = 0; mz < 1; mz++) {
Marat Dukhan801d2c22021-06-02 21:25:05 -070023366 GemmMicrokernelTester()
23367 .mr(1)
23368 .nr(4)
23369 .kr(8)
23370 .sr(1)
23371 .m(1)
23372 .n(4)
23373 .k(k)
23374 .ks(3)
23375 .a_offset(43)
23376 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080023377 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070023378 }
23379 }
23380 }
23381
23382 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, qmin) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070023383 TEST_REQUIRES_X86_SSE2;
23384 GemmMicrokernelTester()
23385 .mr(1)
23386 .nr(4)
23387 .kr(8)
23388 .sr(1)
23389 .m(1)
23390 .n(4)
23391 .k(8)
23392 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080023393 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023394 }
23395
Marat Dukhan801d2c22021-06-02 21:25:05 -070023396 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, qmax) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070023397 TEST_REQUIRES_X86_SSE2;
23398 GemmMicrokernelTester()
23399 .mr(1)
23400 .nr(4)
23401 .kr(8)
23402 .sr(1)
23403 .m(1)
23404 .n(4)
23405 .k(8)
23406 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080023407 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023408 }
23409
Marat Dukhan801d2c22021-06-02 21:25:05 -070023410 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, strided_cm) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070023411 TEST_REQUIRES_X86_SSE2;
23412 GemmMicrokernelTester()
23413 .mr(1)
23414 .nr(4)
23415 .kr(8)
23416 .sr(1)
23417 .m(1)
23418 .n(4)
23419 .k(8)
23420 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080023421 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023422 }
23423#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
23424
23425
23426#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan801d2c22021-06-02 21:25:05 -070023427 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_eq_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070023428 TEST_REQUIRES_X86_SSE2;
23429 GemmMicrokernelTester()
23430 .mr(2)
23431 .nr(4)
23432 .kr(8)
23433 .sr(1)
23434 .m(2)
23435 .n(4)
23436 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080023437 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023438 }
23439
Marat Dukhan801d2c22021-06-02 21:25:05 -070023440 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070023441 TEST_REQUIRES_X86_SSE2;
23442 GemmMicrokernelTester()
23443 .mr(2)
23444 .nr(4)
23445 .kr(8)
23446 .sr(1)
23447 .m(2)
23448 .n(4)
23449 .k(8)
23450 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080023451 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023452 }
23453
Marat Dukhan801d2c22021-06-02 21:25:05 -070023454 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_eq_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070023455 TEST_REQUIRES_X86_SSE2;
Zhi An Ng83844ae2022-01-14 09:52:25 -080023456 for (uint32_t n = 1; n <= 4; n++) {
23457 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070023458 GemmMicrokernelTester()
23459 .mr(2)
23460 .nr(4)
23461 .kr(8)
23462 .sr(1)
23463 .m(m)
23464 .n(n)
23465 .k(8)
23466 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023467 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023468 }
23469 }
23470 }
23471
Marat Dukhan801d2c22021-06-02 21:25:05 -070023472 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_eq_8_subtile_m) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070023473 TEST_REQUIRES_X86_SSE2;
23474 for (uint32_t m = 1; m <= 2; m++) {
23475 GemmMicrokernelTester()
23476 .mr(2)
23477 .nr(4)
23478 .kr(8)
23479 .sr(1)
23480 .m(m)
23481 .n(4)
23482 .k(8)
23483 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023484 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023485 }
23486 }
23487
Marat Dukhan801d2c22021-06-02 21:25:05 -070023488 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_eq_8_subtile_n) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070023489 TEST_REQUIRES_X86_SSE2;
23490 for (uint32_t n = 1; n <= 4; n++) {
23491 GemmMicrokernelTester()
23492 .mr(2)
23493 .nr(4)
23494 .kr(8)
23495 .sr(1)
23496 .m(2)
23497 .n(n)
23498 .k(8)
23499 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023500 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023501 }
23502 }
23503
Marat Dukhan801d2c22021-06-02 21:25:05 -070023504 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_lt_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070023505 TEST_REQUIRES_X86_SSE2;
23506 for (size_t k = 1; k < 8; k++) {
23507 GemmMicrokernelTester()
23508 .mr(2)
23509 .nr(4)
23510 .kr(8)
23511 .sr(1)
23512 .m(2)
23513 .n(4)
23514 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080023515 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023516 }
23517 }
23518
Marat Dukhan801d2c22021-06-02 21:25:05 -070023519 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_lt_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070023520 TEST_REQUIRES_X86_SSE2;
23521 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023522 for (uint32_t n = 1; n <= 4; n++) {
23523 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070023524 GemmMicrokernelTester()
23525 .mr(2)
23526 .nr(4)
23527 .kr(8)
23528 .sr(1)
23529 .m(m)
23530 .n(n)
23531 .k(k)
23532 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023533 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023534 }
23535 }
23536 }
23537 }
23538
Marat Dukhan801d2c22021-06-02 21:25:05 -070023539 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_gt_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070023540 TEST_REQUIRES_X86_SSE2;
23541 for (size_t k = 9; k < 16; k++) {
23542 GemmMicrokernelTester()
23543 .mr(2)
23544 .nr(4)
23545 .kr(8)
23546 .sr(1)
23547 .m(2)
23548 .n(4)
23549 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080023550 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023551 }
23552 }
23553
Marat Dukhan801d2c22021-06-02 21:25:05 -070023554 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_gt_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070023555 TEST_REQUIRES_X86_SSE2;
23556 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023557 for (uint32_t n = 1; n <= 4; n++) {
23558 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070023559 GemmMicrokernelTester()
23560 .mr(2)
23561 .nr(4)
23562 .kr(8)
23563 .sr(1)
23564 .m(m)
23565 .n(n)
23566 .k(k)
23567 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023568 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023569 }
23570 }
23571 }
23572 }
23573
Marat Dukhan801d2c22021-06-02 21:25:05 -070023574 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_div_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070023575 TEST_REQUIRES_X86_SSE2;
23576 for (size_t k = 16; k <= 80; k += 8) {
23577 GemmMicrokernelTester()
23578 .mr(2)
23579 .nr(4)
23580 .kr(8)
23581 .sr(1)
23582 .m(2)
23583 .n(4)
23584 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080023585 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023586 }
23587 }
23588
Marat Dukhan801d2c22021-06-02 21:25:05 -070023589 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_div_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070023590 TEST_REQUIRES_X86_SSE2;
23591 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023592 for (uint32_t n = 1; n <= 4; n++) {
23593 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070023594 GemmMicrokernelTester()
23595 .mr(2)
23596 .nr(4)
23597 .kr(8)
23598 .sr(1)
23599 .m(m)
23600 .n(n)
23601 .k(k)
23602 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023603 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023604 }
23605 }
23606 }
23607 }
23608
Marat Dukhan801d2c22021-06-02 21:25:05 -070023609 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_gt_4) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070023610 TEST_REQUIRES_X86_SSE2;
23611 for (uint32_t n = 5; n < 8; n++) {
23612 for (size_t k = 1; k <= 40; k += 9) {
23613 GemmMicrokernelTester()
23614 .mr(2)
23615 .nr(4)
23616 .kr(8)
23617 .sr(1)
23618 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023619 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070023620 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080023621 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023622 }
23623 }
23624 }
23625
Marat Dukhan801d2c22021-06-02 21:25:05 -070023626 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_gt_4_strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070023627 TEST_REQUIRES_X86_SSE2;
23628 for (uint32_t n = 5; n < 8; n++) {
23629 for (size_t k = 1; k <= 40; k += 9) {
23630 GemmMicrokernelTester()
23631 .mr(2)
23632 .nr(4)
23633 .kr(8)
23634 .sr(1)
23635 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023636 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070023637 .k(k)
23638 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080023639 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023640 }
23641 }
23642 }
23643
Marat Dukhan801d2c22021-06-02 21:25:05 -070023644 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_gt_4_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070023645 TEST_REQUIRES_X86_SSE2;
23646 for (uint32_t n = 5; n < 8; n++) {
23647 for (size_t k = 1; k <= 40; k += 9) {
23648 for (uint32_t m = 1; m <= 2; m++) {
23649 GemmMicrokernelTester()
23650 .mr(2)
23651 .nr(4)
23652 .kr(8)
23653 .sr(1)
23654 .m(m)
23655 .n(n)
23656 .k(k)
23657 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023658 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023659 }
23660 }
23661 }
23662 }
23663
Marat Dukhan801d2c22021-06-02 21:25:05 -070023664 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_div_4) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070023665 TEST_REQUIRES_X86_SSE2;
23666 for (uint32_t n = 8; n <= 12; n += 4) {
23667 for (size_t k = 1; k <= 40; k += 9) {
23668 GemmMicrokernelTester()
23669 .mr(2)
23670 .nr(4)
23671 .kr(8)
23672 .sr(1)
23673 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023674 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070023675 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080023676 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023677 }
23678 }
23679 }
23680
Marat Dukhan801d2c22021-06-02 21:25:05 -070023681 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_div_4_strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070023682 TEST_REQUIRES_X86_SSE2;
23683 for (uint32_t n = 8; n <= 12; n += 4) {
23684 for (size_t k = 1; k <= 40; k += 9) {
23685 GemmMicrokernelTester()
23686 .mr(2)
23687 .nr(4)
23688 .kr(8)
23689 .sr(1)
23690 .m(2)
23691 .n(n)
23692 .k(k)
23693 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080023694 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023695 }
23696 }
23697 }
23698
Marat Dukhan801d2c22021-06-02 21:25:05 -070023699 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_div_4_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070023700 TEST_REQUIRES_X86_SSE2;
23701 for (uint32_t n = 8; n <= 12; n += 4) {
23702 for (size_t k = 1; k <= 40; k += 9) {
23703 for (uint32_t m = 1; m <= 2; m++) {
23704 GemmMicrokernelTester()
23705 .mr(2)
23706 .nr(4)
23707 .kr(8)
23708 .sr(1)
23709 .m(m)
23710 .n(n)
23711 .k(k)
23712 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023713 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023714 }
23715 }
23716 }
23717 }
23718
Marat Dukhan801d2c22021-06-02 21:25:05 -070023719 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, small_kernel) {
23720 TEST_REQUIRES_X86_SSE2;
23721 for (size_t k = 1; k <= 40; k += 9) {
23722 GemmMicrokernelTester()
23723 .mr(2)
23724 .nr(4)
23725 .kr(8)
23726 .sr(1)
23727 .m(2)
23728 .n(4)
23729 .k(k)
23730 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080023731 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070023732 }
23733 }
23734
23735 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, small_kernel_subtile) {
23736 TEST_REQUIRES_X86_SSE2;
23737 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023738 for (uint32_t n = 1; n <= 4; n++) {
23739 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan801d2c22021-06-02 21:25:05 -070023740 GemmMicrokernelTester()
23741 .mr(2)
23742 .nr(4)
23743 .kr(8)
23744 .sr(1)
23745 .m(m)
23746 .n(n)
23747 .k(k)
23748 .ks(3)
23749 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023750 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070023751 }
23752 }
23753 }
23754 }
23755
23756 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_gt_4_small_kernel) {
23757 TEST_REQUIRES_X86_SSE2;
23758 for (uint32_t n = 5; n < 8; n++) {
23759 for (size_t k = 1; k <= 40; k += 9) {
23760 GemmMicrokernelTester()
23761 .mr(2)
23762 .nr(4)
23763 .kr(8)
23764 .sr(1)
23765 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023766 .n(n)
Marat Dukhan801d2c22021-06-02 21:25:05 -070023767 .k(k)
23768 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080023769 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070023770 }
23771 }
23772 }
23773
23774 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_div_4_small_kernel) {
23775 TEST_REQUIRES_X86_SSE2;
23776 for (uint32_t n = 8; n <= 12; n += 4) {
23777 for (size_t k = 1; k <= 40; k += 9) {
23778 GemmMicrokernelTester()
23779 .mr(2)
23780 .nr(4)
23781 .kr(8)
23782 .sr(1)
23783 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023784 .n(n)
Marat Dukhan801d2c22021-06-02 21:25:05 -070023785 .k(k)
23786 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080023787 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070023788 }
23789 }
23790 }
23791
23792 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, strided_cm_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070023793 TEST_REQUIRES_X86_SSE2;
23794 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023795 for (uint32_t n = 1; n <= 4; n++) {
23796 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070023797 GemmMicrokernelTester()
23798 .mr(2)
23799 .nr(4)
23800 .kr(8)
23801 .sr(1)
23802 .m(m)
23803 .n(n)
23804 .k(k)
23805 .cm_stride(7)
23806 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023807 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023808 }
23809 }
23810 }
23811 }
23812
Marat Dukhan801d2c22021-06-02 21:25:05 -070023813 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, a_offset) {
23814 TEST_REQUIRES_X86_SSE2;
23815 for (size_t k = 1; k <= 40; k += 9) {
23816 GemmMicrokernelTester()
23817 .mr(2)
23818 .nr(4)
23819 .kr(8)
23820 .sr(1)
23821 .m(2)
23822 .n(4)
23823 .k(k)
23824 .ks(3)
23825 .a_offset(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080023826 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070023827 }
23828 }
23829
23830 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, zero) {
23831 TEST_REQUIRES_X86_SSE2;
Zhi An Ng83844ae2022-01-14 09:52:25 -080023832 for (size_t k = 1; k <= 40; k += 9) {
23833 for (uint32_t mz = 0; mz < 2; mz++) {
Marat Dukhan801d2c22021-06-02 21:25:05 -070023834 GemmMicrokernelTester()
23835 .mr(2)
23836 .nr(4)
23837 .kr(8)
23838 .sr(1)
23839 .m(2)
23840 .n(4)
23841 .k(k)
23842 .ks(3)
23843 .a_offset(83)
23844 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080023845 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070023846 }
23847 }
23848 }
23849
23850 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, qmin) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070023851 TEST_REQUIRES_X86_SSE2;
23852 GemmMicrokernelTester()
23853 .mr(2)
23854 .nr(4)
23855 .kr(8)
23856 .sr(1)
23857 .m(2)
23858 .n(4)
23859 .k(8)
23860 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080023861 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023862 }
23863
Marat Dukhan801d2c22021-06-02 21:25:05 -070023864 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, qmax) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070023865 TEST_REQUIRES_X86_SSE2;
23866 GemmMicrokernelTester()
23867 .mr(2)
23868 .nr(4)
23869 .kr(8)
23870 .sr(1)
23871 .m(2)
23872 .n(4)
23873 .k(8)
23874 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080023875 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023876 }
23877
Marat Dukhan801d2c22021-06-02 21:25:05 -070023878 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, strided_cm) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070023879 TEST_REQUIRES_X86_SSE2;
23880 GemmMicrokernelTester()
23881 .mr(2)
23882 .nr(4)
23883 .kr(8)
23884 .sr(1)
23885 .m(2)
23886 .n(4)
23887 .k(8)
23888 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080023889 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023890 }
23891#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
23892
23893
23894#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan801d2c22021-06-02 21:25:05 -070023895 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, k_eq_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070023896 TEST_REQUIRES_X86_SSSE3;
23897 GemmMicrokernelTester()
23898 .mr(3)
23899 .nr(4)
23900 .kr(8)
23901 .sr(1)
23902 .m(3)
23903 .n(4)
23904 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080023905 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023906 }
23907
Marat Dukhan801d2c22021-06-02 21:25:05 -070023908 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070023909 TEST_REQUIRES_X86_SSSE3;
23910 GemmMicrokernelTester()
23911 .mr(3)
23912 .nr(4)
23913 .kr(8)
23914 .sr(1)
23915 .m(3)
23916 .n(4)
23917 .k(8)
23918 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080023919 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023920 }
23921
Marat Dukhan801d2c22021-06-02 21:25:05 -070023922 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, k_eq_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070023923 TEST_REQUIRES_X86_SSSE3;
Zhi An Ng83844ae2022-01-14 09:52:25 -080023924 for (uint32_t n = 1; n <= 4; n++) {
23925 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070023926 GemmMicrokernelTester()
23927 .mr(3)
23928 .nr(4)
23929 .kr(8)
23930 .sr(1)
23931 .m(m)
23932 .n(n)
23933 .k(8)
23934 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023935 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023936 }
23937 }
23938 }
23939
Marat Dukhan801d2c22021-06-02 21:25:05 -070023940 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, k_eq_8_subtile_m) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070023941 TEST_REQUIRES_X86_SSSE3;
23942 for (uint32_t m = 1; m <= 3; m++) {
23943 GemmMicrokernelTester()
23944 .mr(3)
23945 .nr(4)
23946 .kr(8)
23947 .sr(1)
23948 .m(m)
23949 .n(4)
23950 .k(8)
23951 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023952 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023953 }
23954 }
23955
Marat Dukhan801d2c22021-06-02 21:25:05 -070023956 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, k_eq_8_subtile_n) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070023957 TEST_REQUIRES_X86_SSSE3;
23958 for (uint32_t n = 1; n <= 4; n++) {
23959 GemmMicrokernelTester()
23960 .mr(3)
23961 .nr(4)
23962 .kr(8)
23963 .sr(1)
23964 .m(3)
23965 .n(n)
23966 .k(8)
23967 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023968 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023969 }
23970 }
23971
Marat Dukhan801d2c22021-06-02 21:25:05 -070023972 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, k_lt_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070023973 TEST_REQUIRES_X86_SSSE3;
23974 for (size_t k = 1; k < 8; k++) {
23975 GemmMicrokernelTester()
23976 .mr(3)
23977 .nr(4)
23978 .kr(8)
23979 .sr(1)
23980 .m(3)
23981 .n(4)
23982 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080023983 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023984 }
23985 }
23986
Marat Dukhan801d2c22021-06-02 21:25:05 -070023987 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, k_lt_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070023988 TEST_REQUIRES_X86_SSSE3;
23989 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023990 for (uint32_t n = 1; n <= 4; n++) {
23991 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070023992 GemmMicrokernelTester()
23993 .mr(3)
23994 .nr(4)
23995 .kr(8)
23996 .sr(1)
23997 .m(m)
23998 .n(n)
23999 .k(k)
24000 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024001 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024002 }
24003 }
24004 }
24005 }
24006
Marat Dukhan801d2c22021-06-02 21:25:05 -070024007 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, k_gt_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070024008 TEST_REQUIRES_X86_SSSE3;
24009 for (size_t k = 9; k < 16; k++) {
24010 GemmMicrokernelTester()
24011 .mr(3)
24012 .nr(4)
24013 .kr(8)
24014 .sr(1)
24015 .m(3)
24016 .n(4)
24017 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080024018 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024019 }
24020 }
24021
Marat Dukhan801d2c22021-06-02 21:25:05 -070024022 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, k_gt_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070024023 TEST_REQUIRES_X86_SSSE3;
24024 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024025 for (uint32_t n = 1; n <= 4; n++) {
24026 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070024027 GemmMicrokernelTester()
24028 .mr(3)
24029 .nr(4)
24030 .kr(8)
24031 .sr(1)
24032 .m(m)
24033 .n(n)
24034 .k(k)
24035 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024036 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024037 }
24038 }
24039 }
24040 }
24041
Marat Dukhan801d2c22021-06-02 21:25:05 -070024042 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, k_div_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070024043 TEST_REQUIRES_X86_SSSE3;
24044 for (size_t k = 16; k <= 80; k += 8) {
24045 GemmMicrokernelTester()
24046 .mr(3)
24047 .nr(4)
24048 .kr(8)
24049 .sr(1)
24050 .m(3)
24051 .n(4)
24052 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080024053 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024054 }
24055 }
24056
Marat Dukhan801d2c22021-06-02 21:25:05 -070024057 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, k_div_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070024058 TEST_REQUIRES_X86_SSSE3;
24059 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024060 for (uint32_t n = 1; n <= 4; n++) {
24061 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070024062 GemmMicrokernelTester()
24063 .mr(3)
24064 .nr(4)
24065 .kr(8)
24066 .sr(1)
24067 .m(m)
24068 .n(n)
24069 .k(k)
24070 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024071 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024072 }
24073 }
24074 }
24075 }
24076
Marat Dukhan801d2c22021-06-02 21:25:05 -070024077 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, n_gt_4) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070024078 TEST_REQUIRES_X86_SSSE3;
24079 for (uint32_t n = 5; n < 8; n++) {
24080 for (size_t k = 1; k <= 40; k += 9) {
24081 GemmMicrokernelTester()
24082 .mr(3)
24083 .nr(4)
24084 .kr(8)
24085 .sr(1)
24086 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024087 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070024088 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080024089 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024090 }
24091 }
24092 }
24093
Marat Dukhan801d2c22021-06-02 21:25:05 -070024094 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, n_gt_4_strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070024095 TEST_REQUIRES_X86_SSSE3;
24096 for (uint32_t n = 5; n < 8; n++) {
24097 for (size_t k = 1; k <= 40; k += 9) {
24098 GemmMicrokernelTester()
24099 .mr(3)
24100 .nr(4)
24101 .kr(8)
24102 .sr(1)
24103 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024104 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070024105 .k(k)
24106 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080024107 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024108 }
24109 }
24110 }
24111
Marat Dukhan801d2c22021-06-02 21:25:05 -070024112 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, n_gt_4_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070024113 TEST_REQUIRES_X86_SSSE3;
24114 for (uint32_t n = 5; n < 8; n++) {
24115 for (size_t k = 1; k <= 40; k += 9) {
24116 for (uint32_t m = 1; m <= 3; m++) {
24117 GemmMicrokernelTester()
24118 .mr(3)
24119 .nr(4)
24120 .kr(8)
24121 .sr(1)
24122 .m(m)
24123 .n(n)
24124 .k(k)
24125 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024126 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024127 }
24128 }
24129 }
24130 }
24131
Marat Dukhan801d2c22021-06-02 21:25:05 -070024132 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, n_div_4) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070024133 TEST_REQUIRES_X86_SSSE3;
24134 for (uint32_t n = 8; n <= 12; n += 4) {
24135 for (size_t k = 1; k <= 40; k += 9) {
24136 GemmMicrokernelTester()
24137 .mr(3)
24138 .nr(4)
24139 .kr(8)
24140 .sr(1)
24141 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024142 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070024143 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080024144 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024145 }
24146 }
24147 }
24148
Marat Dukhan801d2c22021-06-02 21:25:05 -070024149 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, n_div_4_strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070024150 TEST_REQUIRES_X86_SSSE3;
24151 for (uint32_t n = 8; n <= 12; n += 4) {
24152 for (size_t k = 1; k <= 40; k += 9) {
24153 GemmMicrokernelTester()
24154 .mr(3)
24155 .nr(4)
24156 .kr(8)
24157 .sr(1)
24158 .m(3)
24159 .n(n)
24160 .k(k)
24161 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080024162 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024163 }
24164 }
24165 }
24166
Marat Dukhan801d2c22021-06-02 21:25:05 -070024167 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, n_div_4_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070024168 TEST_REQUIRES_X86_SSSE3;
24169 for (uint32_t n = 8; n <= 12; n += 4) {
24170 for (size_t k = 1; k <= 40; k += 9) {
24171 for (uint32_t m = 1; m <= 3; m++) {
24172 GemmMicrokernelTester()
24173 .mr(3)
24174 .nr(4)
24175 .kr(8)
24176 .sr(1)
24177 .m(m)
24178 .n(n)
24179 .k(k)
24180 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024181 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024182 }
24183 }
24184 }
24185 }
24186
Marat Dukhan801d2c22021-06-02 21:25:05 -070024187 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, small_kernel) {
24188 TEST_REQUIRES_X86_SSSE3;
24189 for (size_t k = 1; k <= 40; k += 9) {
24190 GemmMicrokernelTester()
24191 .mr(3)
24192 .nr(4)
24193 .kr(8)
24194 .sr(1)
24195 .m(3)
24196 .n(4)
24197 .k(k)
24198 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080024199 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070024200 }
24201 }
24202
24203 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, small_kernel_subtile) {
24204 TEST_REQUIRES_X86_SSSE3;
24205 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024206 for (uint32_t n = 1; n <= 4; n++) {
24207 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan801d2c22021-06-02 21:25:05 -070024208 GemmMicrokernelTester()
24209 .mr(3)
24210 .nr(4)
24211 .kr(8)
24212 .sr(1)
24213 .m(m)
24214 .n(n)
24215 .k(k)
24216 .ks(3)
24217 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024218 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070024219 }
24220 }
24221 }
24222 }
24223
24224 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, n_gt_4_small_kernel) {
24225 TEST_REQUIRES_X86_SSSE3;
24226 for (uint32_t n = 5; n < 8; n++) {
24227 for (size_t k = 1; k <= 40; k += 9) {
24228 GemmMicrokernelTester()
24229 .mr(3)
24230 .nr(4)
24231 .kr(8)
24232 .sr(1)
24233 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024234 .n(n)
Marat Dukhan801d2c22021-06-02 21:25:05 -070024235 .k(k)
24236 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080024237 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070024238 }
24239 }
24240 }
24241
24242 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, n_div_4_small_kernel) {
24243 TEST_REQUIRES_X86_SSSE3;
24244 for (uint32_t n = 8; n <= 12; n += 4) {
24245 for (size_t k = 1; k <= 40; k += 9) {
24246 GemmMicrokernelTester()
24247 .mr(3)
24248 .nr(4)
24249 .kr(8)
24250 .sr(1)
24251 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024252 .n(n)
Marat Dukhan801d2c22021-06-02 21:25:05 -070024253 .k(k)
24254 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080024255 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070024256 }
24257 }
24258 }
24259
24260 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, strided_cm_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070024261 TEST_REQUIRES_X86_SSSE3;
24262 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024263 for (uint32_t n = 1; n <= 4; n++) {
24264 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070024265 GemmMicrokernelTester()
24266 .mr(3)
24267 .nr(4)
24268 .kr(8)
24269 .sr(1)
24270 .m(m)
24271 .n(n)
24272 .k(k)
24273 .cm_stride(7)
24274 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024275 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024276 }
24277 }
24278 }
24279 }
24280
Marat Dukhan801d2c22021-06-02 21:25:05 -070024281 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, a_offset) {
24282 TEST_REQUIRES_X86_SSSE3;
24283 for (size_t k = 1; k <= 40; k += 9) {
24284 GemmMicrokernelTester()
24285 .mr(3)
24286 .nr(4)
24287 .kr(8)
24288 .sr(1)
24289 .m(3)
24290 .n(4)
24291 .k(k)
24292 .ks(3)
24293 .a_offset(127)
Marat Dukhan50323b82022-01-11 00:12:01 -080024294 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070024295 }
24296 }
24297
24298 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, zero) {
24299 TEST_REQUIRES_X86_SSSE3;
Zhi An Ng83844ae2022-01-14 09:52:25 -080024300 for (size_t k = 1; k <= 40; k += 9) {
24301 for (uint32_t mz = 0; mz < 3; mz++) {
Marat Dukhan801d2c22021-06-02 21:25:05 -070024302 GemmMicrokernelTester()
24303 .mr(3)
24304 .nr(4)
24305 .kr(8)
24306 .sr(1)
24307 .m(3)
24308 .n(4)
24309 .k(k)
24310 .ks(3)
24311 .a_offset(127)
24312 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080024313 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070024314 }
24315 }
24316 }
24317
24318 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, qmin) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070024319 TEST_REQUIRES_X86_SSSE3;
24320 GemmMicrokernelTester()
24321 .mr(3)
24322 .nr(4)
24323 .kr(8)
24324 .sr(1)
24325 .m(3)
24326 .n(4)
24327 .k(8)
24328 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080024329 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024330 }
24331
Marat Dukhan801d2c22021-06-02 21:25:05 -070024332 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, qmax) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070024333 TEST_REQUIRES_X86_SSSE3;
24334 GemmMicrokernelTester()
24335 .mr(3)
24336 .nr(4)
24337 .kr(8)
24338 .sr(1)
24339 .m(3)
24340 .n(4)
24341 .k(8)
24342 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080024343 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024344 }
24345
Marat Dukhan801d2c22021-06-02 21:25:05 -070024346 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, strided_cm) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070024347 TEST_REQUIRES_X86_SSSE3;
24348 GemmMicrokernelTester()
24349 .mr(3)
24350 .nr(4)
24351 .kr(8)
24352 .sr(1)
24353 .m(3)
24354 .n(4)
24355 .k(8)
24356 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080024357 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024358 }
24359#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
24360
24361
24362#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan801d2c22021-06-02 21:25:05 -070024363 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_eq_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070024364 TEST_REQUIRES_X86_SSE41;
24365 GemmMicrokernelTester()
24366 .mr(3)
24367 .nr(4)
24368 .kr(8)
24369 .sr(1)
24370 .m(3)
24371 .n(4)
24372 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080024373 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024374 }
24375
Marat Dukhan801d2c22021-06-02 21:25:05 -070024376 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070024377 TEST_REQUIRES_X86_SSE41;
24378 GemmMicrokernelTester()
24379 .mr(3)
24380 .nr(4)
24381 .kr(8)
24382 .sr(1)
24383 .m(3)
24384 .n(4)
24385 .k(8)
24386 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080024387 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024388 }
24389
Marat Dukhan801d2c22021-06-02 21:25:05 -070024390 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_eq_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070024391 TEST_REQUIRES_X86_SSE41;
Zhi An Ng83844ae2022-01-14 09:52:25 -080024392 for (uint32_t n = 1; n <= 4; n++) {
24393 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070024394 GemmMicrokernelTester()
24395 .mr(3)
24396 .nr(4)
24397 .kr(8)
24398 .sr(1)
24399 .m(m)
24400 .n(n)
24401 .k(8)
24402 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024403 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024404 }
24405 }
24406 }
24407
Marat Dukhan801d2c22021-06-02 21:25:05 -070024408 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_eq_8_subtile_m) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070024409 TEST_REQUIRES_X86_SSE41;
24410 for (uint32_t m = 1; m <= 3; m++) {
24411 GemmMicrokernelTester()
24412 .mr(3)
24413 .nr(4)
24414 .kr(8)
24415 .sr(1)
24416 .m(m)
24417 .n(4)
24418 .k(8)
24419 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024420 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024421 }
24422 }
24423
Marat Dukhan801d2c22021-06-02 21:25:05 -070024424 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_eq_8_subtile_n) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070024425 TEST_REQUIRES_X86_SSE41;
24426 for (uint32_t n = 1; n <= 4; n++) {
24427 GemmMicrokernelTester()
24428 .mr(3)
24429 .nr(4)
24430 .kr(8)
24431 .sr(1)
24432 .m(3)
24433 .n(n)
24434 .k(8)
24435 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024436 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024437 }
24438 }
24439
Marat Dukhan801d2c22021-06-02 21:25:05 -070024440 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_lt_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070024441 TEST_REQUIRES_X86_SSE41;
24442 for (size_t k = 1; k < 8; k++) {
24443 GemmMicrokernelTester()
24444 .mr(3)
24445 .nr(4)
24446 .kr(8)
24447 .sr(1)
24448 .m(3)
24449 .n(4)
24450 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080024451 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024452 }
24453 }
24454
Marat Dukhan801d2c22021-06-02 21:25:05 -070024455 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_lt_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070024456 TEST_REQUIRES_X86_SSE41;
24457 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024458 for (uint32_t n = 1; n <= 4; n++) {
24459 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070024460 GemmMicrokernelTester()
24461 .mr(3)
24462 .nr(4)
24463 .kr(8)
24464 .sr(1)
24465 .m(m)
24466 .n(n)
24467 .k(k)
24468 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024469 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024470 }
24471 }
24472 }
24473 }
24474
Marat Dukhan801d2c22021-06-02 21:25:05 -070024475 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_gt_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070024476 TEST_REQUIRES_X86_SSE41;
24477 for (size_t k = 9; k < 16; k++) {
24478 GemmMicrokernelTester()
24479 .mr(3)
24480 .nr(4)
24481 .kr(8)
24482 .sr(1)
24483 .m(3)
24484 .n(4)
24485 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080024486 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024487 }
24488 }
24489
Marat Dukhan801d2c22021-06-02 21:25:05 -070024490 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_gt_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070024491 TEST_REQUIRES_X86_SSE41;
24492 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024493 for (uint32_t n = 1; n <= 4; n++) {
24494 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070024495 GemmMicrokernelTester()
24496 .mr(3)
24497 .nr(4)
24498 .kr(8)
24499 .sr(1)
24500 .m(m)
24501 .n(n)
24502 .k(k)
24503 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024504 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024505 }
24506 }
24507 }
24508 }
24509
Marat Dukhan801d2c22021-06-02 21:25:05 -070024510 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_div_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070024511 TEST_REQUIRES_X86_SSE41;
24512 for (size_t k = 16; k <= 80; k += 8) {
24513 GemmMicrokernelTester()
24514 .mr(3)
24515 .nr(4)
24516 .kr(8)
24517 .sr(1)
24518 .m(3)
24519 .n(4)
24520 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080024521 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024522 }
24523 }
24524
Marat Dukhan801d2c22021-06-02 21:25:05 -070024525 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_div_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070024526 TEST_REQUIRES_X86_SSE41;
24527 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024528 for (uint32_t n = 1; n <= 4; n++) {
24529 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070024530 GemmMicrokernelTester()
24531 .mr(3)
24532 .nr(4)
24533 .kr(8)
24534 .sr(1)
24535 .m(m)
24536 .n(n)
24537 .k(k)
24538 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024539 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024540 }
24541 }
24542 }
24543 }
24544
Marat Dukhan801d2c22021-06-02 21:25:05 -070024545 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_gt_4) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070024546 TEST_REQUIRES_X86_SSE41;
24547 for (uint32_t n = 5; n < 8; n++) {
24548 for (size_t k = 1; k <= 40; k += 9) {
24549 GemmMicrokernelTester()
24550 .mr(3)
24551 .nr(4)
24552 .kr(8)
24553 .sr(1)
24554 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024555 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070024556 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080024557 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024558 }
24559 }
24560 }
24561
Marat Dukhan801d2c22021-06-02 21:25:05 -070024562 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_gt_4_strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070024563 TEST_REQUIRES_X86_SSE41;
24564 for (uint32_t n = 5; n < 8; n++) {
24565 for (size_t k = 1; k <= 40; k += 9) {
24566 GemmMicrokernelTester()
24567 .mr(3)
24568 .nr(4)
24569 .kr(8)
24570 .sr(1)
24571 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024572 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070024573 .k(k)
24574 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080024575 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024576 }
24577 }
24578 }
24579
Marat Dukhan801d2c22021-06-02 21:25:05 -070024580 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_gt_4_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070024581 TEST_REQUIRES_X86_SSE41;
24582 for (uint32_t n = 5; n < 8; n++) {
24583 for (size_t k = 1; k <= 40; k += 9) {
24584 for (uint32_t m = 1; m <= 3; m++) {
24585 GemmMicrokernelTester()
24586 .mr(3)
24587 .nr(4)
24588 .kr(8)
24589 .sr(1)
24590 .m(m)
24591 .n(n)
24592 .k(k)
24593 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024594 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024595 }
24596 }
24597 }
24598 }
24599
Marat Dukhan801d2c22021-06-02 21:25:05 -070024600 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_div_4) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070024601 TEST_REQUIRES_X86_SSE41;
24602 for (uint32_t n = 8; n <= 12; n += 4) {
24603 for (size_t k = 1; k <= 40; k += 9) {
24604 GemmMicrokernelTester()
24605 .mr(3)
24606 .nr(4)
24607 .kr(8)
24608 .sr(1)
24609 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024610 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070024611 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080024612 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024613 }
24614 }
24615 }
24616
Marat Dukhan801d2c22021-06-02 21:25:05 -070024617 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_div_4_strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070024618 TEST_REQUIRES_X86_SSE41;
24619 for (uint32_t n = 8; n <= 12; n += 4) {
24620 for (size_t k = 1; k <= 40; k += 9) {
24621 GemmMicrokernelTester()
24622 .mr(3)
24623 .nr(4)
24624 .kr(8)
24625 .sr(1)
24626 .m(3)
24627 .n(n)
24628 .k(k)
24629 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080024630 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024631 }
24632 }
24633 }
24634
Marat Dukhan801d2c22021-06-02 21:25:05 -070024635 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_div_4_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070024636 TEST_REQUIRES_X86_SSE41;
24637 for (uint32_t n = 8; n <= 12; n += 4) {
24638 for (size_t k = 1; k <= 40; k += 9) {
24639 for (uint32_t m = 1; m <= 3; m++) {
24640 GemmMicrokernelTester()
24641 .mr(3)
24642 .nr(4)
24643 .kr(8)
24644 .sr(1)
24645 .m(m)
24646 .n(n)
24647 .k(k)
24648 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024649 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024650 }
24651 }
24652 }
24653 }
24654
Marat Dukhan801d2c22021-06-02 21:25:05 -070024655 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, small_kernel) {
24656 TEST_REQUIRES_X86_SSE41;
24657 for (size_t k = 1; k <= 40; k += 9) {
24658 GemmMicrokernelTester()
24659 .mr(3)
24660 .nr(4)
24661 .kr(8)
24662 .sr(1)
24663 .m(3)
24664 .n(4)
24665 .k(k)
24666 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080024667 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070024668 }
24669 }
24670
24671 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, small_kernel_subtile) {
24672 TEST_REQUIRES_X86_SSE41;
24673 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024674 for (uint32_t n = 1; n <= 4; n++) {
24675 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan801d2c22021-06-02 21:25:05 -070024676 GemmMicrokernelTester()
24677 .mr(3)
24678 .nr(4)
24679 .kr(8)
24680 .sr(1)
24681 .m(m)
24682 .n(n)
24683 .k(k)
24684 .ks(3)
24685 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024686 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070024687 }
24688 }
24689 }
24690 }
24691
24692 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_gt_4_small_kernel) {
24693 TEST_REQUIRES_X86_SSE41;
24694 for (uint32_t n = 5; n < 8; n++) {
24695 for (size_t k = 1; k <= 40; k += 9) {
24696 GemmMicrokernelTester()
24697 .mr(3)
24698 .nr(4)
24699 .kr(8)
24700 .sr(1)
24701 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024702 .n(n)
Marat Dukhan801d2c22021-06-02 21:25:05 -070024703 .k(k)
24704 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080024705 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070024706 }
24707 }
24708 }
24709
24710 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_div_4_small_kernel) {
24711 TEST_REQUIRES_X86_SSE41;
24712 for (uint32_t n = 8; n <= 12; n += 4) {
24713 for (size_t k = 1; k <= 40; k += 9) {
24714 GemmMicrokernelTester()
24715 .mr(3)
24716 .nr(4)
24717 .kr(8)
24718 .sr(1)
24719 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024720 .n(n)
Marat Dukhan801d2c22021-06-02 21:25:05 -070024721 .k(k)
24722 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080024723 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070024724 }
24725 }
24726 }
24727
24728 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, strided_cm_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070024729 TEST_REQUIRES_X86_SSE41;
24730 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024731 for (uint32_t n = 1; n <= 4; n++) {
24732 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070024733 GemmMicrokernelTester()
24734 .mr(3)
24735 .nr(4)
24736 .kr(8)
24737 .sr(1)
24738 .m(m)
24739 .n(n)
24740 .k(k)
24741 .cm_stride(7)
24742 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024743 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024744 }
24745 }
24746 }
24747 }
24748
Marat Dukhan801d2c22021-06-02 21:25:05 -070024749 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, a_offset) {
24750 TEST_REQUIRES_X86_SSE41;
24751 for (size_t k = 1; k <= 40; k += 9) {
24752 GemmMicrokernelTester()
24753 .mr(3)
24754 .nr(4)
24755 .kr(8)
24756 .sr(1)
24757 .m(3)
24758 .n(4)
24759 .k(k)
24760 .ks(3)
24761 .a_offset(127)
Marat Dukhan50323b82022-01-11 00:12:01 -080024762 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070024763 }
24764 }
24765
24766 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, zero) {
24767 TEST_REQUIRES_X86_SSE41;
Zhi An Ng83844ae2022-01-14 09:52:25 -080024768 for (size_t k = 1; k <= 40; k += 9) {
24769 for (uint32_t mz = 0; mz < 3; mz++) {
Marat Dukhan801d2c22021-06-02 21:25:05 -070024770 GemmMicrokernelTester()
24771 .mr(3)
24772 .nr(4)
24773 .kr(8)
24774 .sr(1)
24775 .m(3)
24776 .n(4)
24777 .k(k)
24778 .ks(3)
24779 .a_offset(127)
24780 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080024781 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070024782 }
24783 }
24784 }
24785
24786 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, qmin) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070024787 TEST_REQUIRES_X86_SSE41;
24788 GemmMicrokernelTester()
24789 .mr(3)
24790 .nr(4)
24791 .kr(8)
24792 .sr(1)
24793 .m(3)
24794 .n(4)
24795 .k(8)
24796 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080024797 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024798 }
24799
Marat Dukhan801d2c22021-06-02 21:25:05 -070024800 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, qmax) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070024801 TEST_REQUIRES_X86_SSE41;
24802 GemmMicrokernelTester()
24803 .mr(3)
24804 .nr(4)
24805 .kr(8)
24806 .sr(1)
24807 .m(3)
24808 .n(4)
24809 .k(8)
24810 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080024811 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024812 }
24813
Marat Dukhan801d2c22021-06-02 21:25:05 -070024814 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, strided_cm) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070024815 TEST_REQUIRES_X86_SSE41;
24816 GemmMicrokernelTester()
24817 .mr(3)
24818 .nr(4)
24819 .kr(8)
24820 .sr(1)
24821 .m(3)
24822 .n(4)
24823 .k(8)
24824 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080024825 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024826 }
24827#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
24828
24829
24830#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan801d2c22021-06-02 21:25:05 -070024831 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_eq_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070024832 TEST_REQUIRES_X86_AVX;
24833 GemmMicrokernelTester()
24834 .mr(2)
24835 .nr(4)
24836 .kr(8)
24837 .sr(1)
24838 .m(2)
24839 .n(4)
24840 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080024841 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024842 }
24843
Marat Dukhan801d2c22021-06-02 21:25:05 -070024844 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070024845 TEST_REQUIRES_X86_AVX;
24846 GemmMicrokernelTester()
24847 .mr(2)
24848 .nr(4)
24849 .kr(8)
24850 .sr(1)
24851 .m(2)
24852 .n(4)
24853 .k(8)
24854 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080024855 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024856 }
24857
Marat Dukhan801d2c22021-06-02 21:25:05 -070024858 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_eq_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070024859 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080024860 for (uint32_t n = 1; n <= 4; n++) {
24861 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070024862 GemmMicrokernelTester()
24863 .mr(2)
24864 .nr(4)
24865 .kr(8)
24866 .sr(1)
24867 .m(m)
24868 .n(n)
24869 .k(8)
24870 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024871 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024872 }
24873 }
24874 }
24875
Marat Dukhan801d2c22021-06-02 21:25:05 -070024876 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_eq_8_subtile_m) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070024877 TEST_REQUIRES_X86_AVX;
24878 for (uint32_t m = 1; m <= 2; m++) {
24879 GemmMicrokernelTester()
24880 .mr(2)
24881 .nr(4)
24882 .kr(8)
24883 .sr(1)
24884 .m(m)
24885 .n(4)
24886 .k(8)
24887 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024888 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024889 }
24890 }
24891
Marat Dukhan801d2c22021-06-02 21:25:05 -070024892 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_eq_8_subtile_n) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070024893 TEST_REQUIRES_X86_AVX;
24894 for (uint32_t n = 1; n <= 4; n++) {
24895 GemmMicrokernelTester()
24896 .mr(2)
24897 .nr(4)
24898 .kr(8)
24899 .sr(1)
24900 .m(2)
24901 .n(n)
24902 .k(8)
24903 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024904 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024905 }
24906 }
24907
Marat Dukhan801d2c22021-06-02 21:25:05 -070024908 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_lt_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070024909 TEST_REQUIRES_X86_AVX;
24910 for (size_t k = 1; k < 8; k++) {
24911 GemmMicrokernelTester()
24912 .mr(2)
24913 .nr(4)
24914 .kr(8)
24915 .sr(1)
24916 .m(2)
24917 .n(4)
24918 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080024919 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024920 }
24921 }
24922
Marat Dukhan801d2c22021-06-02 21:25:05 -070024923 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_lt_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070024924 TEST_REQUIRES_X86_AVX;
24925 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024926 for (uint32_t n = 1; n <= 4; n++) {
24927 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070024928 GemmMicrokernelTester()
24929 .mr(2)
24930 .nr(4)
24931 .kr(8)
24932 .sr(1)
24933 .m(m)
24934 .n(n)
24935 .k(k)
24936 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024937 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024938 }
24939 }
24940 }
24941 }
24942
Marat Dukhan801d2c22021-06-02 21:25:05 -070024943 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_gt_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070024944 TEST_REQUIRES_X86_AVX;
24945 for (size_t k = 9; k < 16; k++) {
24946 GemmMicrokernelTester()
24947 .mr(2)
24948 .nr(4)
24949 .kr(8)
24950 .sr(1)
24951 .m(2)
24952 .n(4)
24953 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080024954 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024955 }
24956 }
24957
Marat Dukhan801d2c22021-06-02 21:25:05 -070024958 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_gt_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070024959 TEST_REQUIRES_X86_AVX;
24960 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024961 for (uint32_t n = 1; n <= 4; n++) {
24962 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070024963 GemmMicrokernelTester()
24964 .mr(2)
24965 .nr(4)
24966 .kr(8)
24967 .sr(1)
24968 .m(m)
24969 .n(n)
24970 .k(k)
24971 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024972 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024973 }
24974 }
24975 }
24976 }
24977
Marat Dukhan801d2c22021-06-02 21:25:05 -070024978 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_div_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070024979 TEST_REQUIRES_X86_AVX;
24980 for (size_t k = 16; k <= 80; k += 8) {
24981 GemmMicrokernelTester()
24982 .mr(2)
24983 .nr(4)
24984 .kr(8)
24985 .sr(1)
24986 .m(2)
24987 .n(4)
24988 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080024989 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024990 }
24991 }
24992
Marat Dukhan801d2c22021-06-02 21:25:05 -070024993 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_div_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070024994 TEST_REQUIRES_X86_AVX;
24995 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024996 for (uint32_t n = 1; n <= 4; n++) {
24997 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070024998 GemmMicrokernelTester()
24999 .mr(2)
25000 .nr(4)
25001 .kr(8)
25002 .sr(1)
25003 .m(m)
25004 .n(n)
25005 .k(k)
25006 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025007 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025008 }
25009 }
25010 }
25011 }
25012
Marat Dukhan801d2c22021-06-02 21:25:05 -070025013 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_gt_4) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070025014 TEST_REQUIRES_X86_AVX;
25015 for (uint32_t n = 5; n < 8; n++) {
25016 for (size_t k = 1; k <= 40; k += 9) {
25017 GemmMicrokernelTester()
25018 .mr(2)
25019 .nr(4)
25020 .kr(8)
25021 .sr(1)
25022 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025023 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070025024 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080025025 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025026 }
25027 }
25028 }
25029
Marat Dukhan801d2c22021-06-02 21:25:05 -070025030 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_gt_4_strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070025031 TEST_REQUIRES_X86_AVX;
25032 for (uint32_t n = 5; n < 8; n++) {
25033 for (size_t k = 1; k <= 40; k += 9) {
25034 GemmMicrokernelTester()
25035 .mr(2)
25036 .nr(4)
25037 .kr(8)
25038 .sr(1)
25039 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025040 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070025041 .k(k)
25042 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080025043 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025044 }
25045 }
25046 }
25047
Marat Dukhan801d2c22021-06-02 21:25:05 -070025048 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_gt_4_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070025049 TEST_REQUIRES_X86_AVX;
25050 for (uint32_t n = 5; n < 8; n++) {
25051 for (size_t k = 1; k <= 40; k += 9) {
25052 for (uint32_t m = 1; m <= 2; m++) {
25053 GemmMicrokernelTester()
25054 .mr(2)
25055 .nr(4)
25056 .kr(8)
25057 .sr(1)
25058 .m(m)
25059 .n(n)
25060 .k(k)
25061 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025062 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025063 }
25064 }
25065 }
25066 }
25067
Marat Dukhan801d2c22021-06-02 21:25:05 -070025068 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_div_4) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070025069 TEST_REQUIRES_X86_AVX;
25070 for (uint32_t n = 8; n <= 12; n += 4) {
25071 for (size_t k = 1; k <= 40; k += 9) {
25072 GemmMicrokernelTester()
25073 .mr(2)
25074 .nr(4)
25075 .kr(8)
25076 .sr(1)
25077 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025078 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070025079 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080025080 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025081 }
25082 }
25083 }
25084
Marat Dukhan801d2c22021-06-02 21:25:05 -070025085 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_div_4_strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070025086 TEST_REQUIRES_X86_AVX;
25087 for (uint32_t n = 8; n <= 12; n += 4) {
25088 for (size_t k = 1; k <= 40; k += 9) {
25089 GemmMicrokernelTester()
25090 .mr(2)
25091 .nr(4)
25092 .kr(8)
25093 .sr(1)
25094 .m(2)
25095 .n(n)
25096 .k(k)
25097 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080025098 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025099 }
25100 }
25101 }
25102
Marat Dukhan801d2c22021-06-02 21:25:05 -070025103 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_div_4_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070025104 TEST_REQUIRES_X86_AVX;
25105 for (uint32_t n = 8; n <= 12; n += 4) {
25106 for (size_t k = 1; k <= 40; k += 9) {
25107 for (uint32_t m = 1; m <= 2; m++) {
25108 GemmMicrokernelTester()
25109 .mr(2)
25110 .nr(4)
25111 .kr(8)
25112 .sr(1)
25113 .m(m)
25114 .n(n)
25115 .k(k)
25116 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025117 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025118 }
25119 }
25120 }
25121 }
25122
Marat Dukhan801d2c22021-06-02 21:25:05 -070025123 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, small_kernel) {
25124 TEST_REQUIRES_X86_AVX;
25125 for (size_t k = 1; k <= 40; k += 9) {
25126 GemmMicrokernelTester()
25127 .mr(2)
25128 .nr(4)
25129 .kr(8)
25130 .sr(1)
25131 .m(2)
25132 .n(4)
25133 .k(k)
25134 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080025135 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070025136 }
25137 }
25138
25139 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, small_kernel_subtile) {
25140 TEST_REQUIRES_X86_AVX;
25141 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025142 for (uint32_t n = 1; n <= 4; n++) {
25143 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan801d2c22021-06-02 21:25:05 -070025144 GemmMicrokernelTester()
25145 .mr(2)
25146 .nr(4)
25147 .kr(8)
25148 .sr(1)
25149 .m(m)
25150 .n(n)
25151 .k(k)
25152 .ks(3)
25153 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025154 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070025155 }
25156 }
25157 }
25158 }
25159
25160 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_gt_4_small_kernel) {
25161 TEST_REQUIRES_X86_AVX;
25162 for (uint32_t n = 5; n < 8; n++) {
25163 for (size_t k = 1; k <= 40; k += 9) {
25164 GemmMicrokernelTester()
25165 .mr(2)
25166 .nr(4)
25167 .kr(8)
25168 .sr(1)
25169 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025170 .n(n)
Marat Dukhan801d2c22021-06-02 21:25:05 -070025171 .k(k)
25172 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080025173 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070025174 }
25175 }
25176 }
25177
25178 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_div_4_small_kernel) {
25179 TEST_REQUIRES_X86_AVX;
25180 for (uint32_t n = 8; n <= 12; n += 4) {
25181 for (size_t k = 1; k <= 40; k += 9) {
25182 GemmMicrokernelTester()
25183 .mr(2)
25184 .nr(4)
25185 .kr(8)
25186 .sr(1)
25187 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025188 .n(n)
Marat Dukhan801d2c22021-06-02 21:25:05 -070025189 .k(k)
25190 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080025191 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070025192 }
25193 }
25194 }
25195
25196 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, strided_cm_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070025197 TEST_REQUIRES_X86_AVX;
25198 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025199 for (uint32_t n = 1; n <= 4; n++) {
25200 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070025201 GemmMicrokernelTester()
25202 .mr(2)
25203 .nr(4)
25204 .kr(8)
25205 .sr(1)
25206 .m(m)
25207 .n(n)
25208 .k(k)
25209 .cm_stride(7)
25210 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025211 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025212 }
25213 }
25214 }
25215 }
25216
Marat Dukhan801d2c22021-06-02 21:25:05 -070025217 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, a_offset) {
25218 TEST_REQUIRES_X86_AVX;
25219 for (size_t k = 1; k <= 40; k += 9) {
25220 GemmMicrokernelTester()
25221 .mr(2)
25222 .nr(4)
25223 .kr(8)
25224 .sr(1)
25225 .m(2)
25226 .n(4)
25227 .k(k)
25228 .ks(3)
25229 .a_offset(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080025230 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070025231 }
25232 }
25233
25234 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, zero) {
25235 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080025236 for (size_t k = 1; k <= 40; k += 9) {
25237 for (uint32_t mz = 0; mz < 2; mz++) {
Marat Dukhan801d2c22021-06-02 21:25:05 -070025238 GemmMicrokernelTester()
25239 .mr(2)
25240 .nr(4)
25241 .kr(8)
25242 .sr(1)
25243 .m(2)
25244 .n(4)
25245 .k(k)
25246 .ks(3)
25247 .a_offset(83)
25248 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080025249 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070025250 }
25251 }
25252 }
25253
25254 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, qmin) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070025255 TEST_REQUIRES_X86_AVX;
25256 GemmMicrokernelTester()
25257 .mr(2)
25258 .nr(4)
25259 .kr(8)
25260 .sr(1)
25261 .m(2)
25262 .n(4)
25263 .k(8)
25264 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080025265 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025266 }
25267
Marat Dukhan801d2c22021-06-02 21:25:05 -070025268 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, qmax) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070025269 TEST_REQUIRES_X86_AVX;
25270 GemmMicrokernelTester()
25271 .mr(2)
25272 .nr(4)
25273 .kr(8)
25274 .sr(1)
25275 .m(2)
25276 .n(4)
25277 .k(8)
25278 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080025279 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025280 }
25281
Marat Dukhan801d2c22021-06-02 21:25:05 -070025282 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, strided_cm) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070025283 TEST_REQUIRES_X86_AVX;
25284 GemmMicrokernelTester()
25285 .mr(2)
25286 .nr(4)
25287 .kr(8)
25288 .sr(1)
25289 .m(2)
25290 .n(4)
25291 .k(8)
25292 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080025293 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025294 }
25295#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
25296
25297
25298#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan801d2c22021-06-02 21:25:05 -070025299 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_eq_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070025300 TEST_REQUIRES_X86_AVX;
25301 GemmMicrokernelTester()
25302 .mr(3)
25303 .nr(4)
25304 .kr(8)
25305 .sr(1)
25306 .m(3)
25307 .n(4)
25308 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080025309 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025310 }
25311
Marat Dukhan801d2c22021-06-02 21:25:05 -070025312 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070025313 TEST_REQUIRES_X86_AVX;
25314 GemmMicrokernelTester()
25315 .mr(3)
25316 .nr(4)
25317 .kr(8)
25318 .sr(1)
25319 .m(3)
25320 .n(4)
25321 .k(8)
25322 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080025323 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025324 }
25325
Marat Dukhan801d2c22021-06-02 21:25:05 -070025326 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_eq_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070025327 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080025328 for (uint32_t n = 1; n <= 4; n++) {
25329 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070025330 GemmMicrokernelTester()
25331 .mr(3)
25332 .nr(4)
25333 .kr(8)
25334 .sr(1)
25335 .m(m)
25336 .n(n)
25337 .k(8)
25338 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025339 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025340 }
25341 }
25342 }
25343
Marat Dukhan801d2c22021-06-02 21:25:05 -070025344 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_eq_8_subtile_m) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070025345 TEST_REQUIRES_X86_AVX;
25346 for (uint32_t m = 1; m <= 3; m++) {
25347 GemmMicrokernelTester()
25348 .mr(3)
25349 .nr(4)
25350 .kr(8)
25351 .sr(1)
25352 .m(m)
25353 .n(4)
25354 .k(8)
25355 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025356 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025357 }
25358 }
25359
Marat Dukhan801d2c22021-06-02 21:25:05 -070025360 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_eq_8_subtile_n) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070025361 TEST_REQUIRES_X86_AVX;
25362 for (uint32_t n = 1; n <= 4; n++) {
25363 GemmMicrokernelTester()
25364 .mr(3)
25365 .nr(4)
25366 .kr(8)
25367 .sr(1)
25368 .m(3)
25369 .n(n)
25370 .k(8)
25371 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025372 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025373 }
25374 }
25375
Marat Dukhan801d2c22021-06-02 21:25:05 -070025376 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_lt_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070025377 TEST_REQUIRES_X86_AVX;
25378 for (size_t k = 1; k < 8; k++) {
25379 GemmMicrokernelTester()
25380 .mr(3)
25381 .nr(4)
25382 .kr(8)
25383 .sr(1)
25384 .m(3)
25385 .n(4)
25386 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080025387 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025388 }
25389 }
25390
Marat Dukhan801d2c22021-06-02 21:25:05 -070025391 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_lt_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070025392 TEST_REQUIRES_X86_AVX;
25393 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025394 for (uint32_t n = 1; n <= 4; n++) {
25395 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070025396 GemmMicrokernelTester()
25397 .mr(3)
25398 .nr(4)
25399 .kr(8)
25400 .sr(1)
25401 .m(m)
25402 .n(n)
25403 .k(k)
25404 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025405 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025406 }
25407 }
25408 }
25409 }
25410
Marat Dukhan801d2c22021-06-02 21:25:05 -070025411 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_gt_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070025412 TEST_REQUIRES_X86_AVX;
25413 for (size_t k = 9; k < 16; k++) {
25414 GemmMicrokernelTester()
25415 .mr(3)
25416 .nr(4)
25417 .kr(8)
25418 .sr(1)
25419 .m(3)
25420 .n(4)
25421 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080025422 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025423 }
25424 }
25425
Marat Dukhan801d2c22021-06-02 21:25:05 -070025426 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_gt_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070025427 TEST_REQUIRES_X86_AVX;
25428 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025429 for (uint32_t n = 1; n <= 4; n++) {
25430 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070025431 GemmMicrokernelTester()
25432 .mr(3)
25433 .nr(4)
25434 .kr(8)
25435 .sr(1)
25436 .m(m)
25437 .n(n)
25438 .k(k)
25439 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025440 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025441 }
25442 }
25443 }
25444 }
25445
Marat Dukhan801d2c22021-06-02 21:25:05 -070025446 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_div_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070025447 TEST_REQUIRES_X86_AVX;
25448 for (size_t k = 16; k <= 80; k += 8) {
25449 GemmMicrokernelTester()
25450 .mr(3)
25451 .nr(4)
25452 .kr(8)
25453 .sr(1)
25454 .m(3)
25455 .n(4)
25456 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080025457 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025458 }
25459 }
25460
Marat Dukhan801d2c22021-06-02 21:25:05 -070025461 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_div_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070025462 TEST_REQUIRES_X86_AVX;
25463 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025464 for (uint32_t n = 1; n <= 4; n++) {
25465 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070025466 GemmMicrokernelTester()
25467 .mr(3)
25468 .nr(4)
25469 .kr(8)
25470 .sr(1)
25471 .m(m)
25472 .n(n)
25473 .k(k)
25474 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025475 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025476 }
25477 }
25478 }
25479 }
25480
Marat Dukhan801d2c22021-06-02 21:25:05 -070025481 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_gt_4) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070025482 TEST_REQUIRES_X86_AVX;
25483 for (uint32_t n = 5; n < 8; n++) {
25484 for (size_t k = 1; k <= 40; k += 9) {
25485 GemmMicrokernelTester()
25486 .mr(3)
25487 .nr(4)
25488 .kr(8)
25489 .sr(1)
25490 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025491 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070025492 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080025493 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025494 }
25495 }
25496 }
25497
Marat Dukhan801d2c22021-06-02 21:25:05 -070025498 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_gt_4_strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070025499 TEST_REQUIRES_X86_AVX;
25500 for (uint32_t n = 5; n < 8; n++) {
25501 for (size_t k = 1; k <= 40; k += 9) {
25502 GemmMicrokernelTester()
25503 .mr(3)
25504 .nr(4)
25505 .kr(8)
25506 .sr(1)
25507 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025508 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070025509 .k(k)
25510 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080025511 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025512 }
25513 }
25514 }
25515
Marat Dukhan801d2c22021-06-02 21:25:05 -070025516 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_gt_4_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070025517 TEST_REQUIRES_X86_AVX;
25518 for (uint32_t n = 5; n < 8; n++) {
25519 for (size_t k = 1; k <= 40; k += 9) {
25520 for (uint32_t m = 1; m <= 3; m++) {
25521 GemmMicrokernelTester()
25522 .mr(3)
25523 .nr(4)
25524 .kr(8)
25525 .sr(1)
25526 .m(m)
25527 .n(n)
25528 .k(k)
25529 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025530 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025531 }
25532 }
25533 }
25534 }
25535
Marat Dukhan801d2c22021-06-02 21:25:05 -070025536 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_div_4) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070025537 TEST_REQUIRES_X86_AVX;
25538 for (uint32_t n = 8; n <= 12; n += 4) {
25539 for (size_t k = 1; k <= 40; k += 9) {
25540 GemmMicrokernelTester()
25541 .mr(3)
25542 .nr(4)
25543 .kr(8)
25544 .sr(1)
25545 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025546 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070025547 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080025548 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025549 }
25550 }
25551 }
25552
Marat Dukhan801d2c22021-06-02 21:25:05 -070025553 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_div_4_strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070025554 TEST_REQUIRES_X86_AVX;
25555 for (uint32_t n = 8; n <= 12; n += 4) {
25556 for (size_t k = 1; k <= 40; k += 9) {
25557 GemmMicrokernelTester()
25558 .mr(3)
25559 .nr(4)
25560 .kr(8)
25561 .sr(1)
25562 .m(3)
25563 .n(n)
25564 .k(k)
25565 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080025566 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025567 }
25568 }
25569 }
25570
Marat Dukhan801d2c22021-06-02 21:25:05 -070025571 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_div_4_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070025572 TEST_REQUIRES_X86_AVX;
25573 for (uint32_t n = 8; n <= 12; n += 4) {
25574 for (size_t k = 1; k <= 40; k += 9) {
25575 for (uint32_t m = 1; m <= 3; m++) {
25576 GemmMicrokernelTester()
25577 .mr(3)
25578 .nr(4)
25579 .kr(8)
25580 .sr(1)
25581 .m(m)
25582 .n(n)
25583 .k(k)
25584 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025585 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025586 }
25587 }
25588 }
25589 }
25590
Marat Dukhan801d2c22021-06-02 21:25:05 -070025591 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, small_kernel) {
25592 TEST_REQUIRES_X86_AVX;
25593 for (size_t k = 1; k <= 40; k += 9) {
25594 GemmMicrokernelTester()
25595 .mr(3)
25596 .nr(4)
25597 .kr(8)
25598 .sr(1)
25599 .m(3)
25600 .n(4)
25601 .k(k)
25602 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080025603 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070025604 }
25605 }
25606
25607 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, small_kernel_subtile) {
25608 TEST_REQUIRES_X86_AVX;
25609 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025610 for (uint32_t n = 1; n <= 4; n++) {
25611 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan801d2c22021-06-02 21:25:05 -070025612 GemmMicrokernelTester()
25613 .mr(3)
25614 .nr(4)
25615 .kr(8)
25616 .sr(1)
25617 .m(m)
25618 .n(n)
25619 .k(k)
25620 .ks(3)
25621 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025622 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070025623 }
25624 }
25625 }
25626 }
25627
25628 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_gt_4_small_kernel) {
25629 TEST_REQUIRES_X86_AVX;
25630 for (uint32_t n = 5; n < 8; n++) {
25631 for (size_t k = 1; k <= 40; k += 9) {
25632 GemmMicrokernelTester()
25633 .mr(3)
25634 .nr(4)
25635 .kr(8)
25636 .sr(1)
25637 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025638 .n(n)
Marat Dukhan801d2c22021-06-02 21:25:05 -070025639 .k(k)
25640 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080025641 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070025642 }
25643 }
25644 }
25645
25646 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_div_4_small_kernel) {
25647 TEST_REQUIRES_X86_AVX;
25648 for (uint32_t n = 8; n <= 12; n += 4) {
25649 for (size_t k = 1; k <= 40; k += 9) {
25650 GemmMicrokernelTester()
25651 .mr(3)
25652 .nr(4)
25653 .kr(8)
25654 .sr(1)
25655 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025656 .n(n)
Marat Dukhan801d2c22021-06-02 21:25:05 -070025657 .k(k)
25658 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080025659 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070025660 }
25661 }
25662 }
25663
25664 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, strided_cm_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070025665 TEST_REQUIRES_X86_AVX;
25666 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025667 for (uint32_t n = 1; n <= 4; n++) {
25668 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070025669 GemmMicrokernelTester()
25670 .mr(3)
25671 .nr(4)
25672 .kr(8)
25673 .sr(1)
25674 .m(m)
25675 .n(n)
25676 .k(k)
25677 .cm_stride(7)
25678 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025679 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025680 }
25681 }
25682 }
25683 }
25684
Marat Dukhan801d2c22021-06-02 21:25:05 -070025685 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, a_offset) {
25686 TEST_REQUIRES_X86_AVX;
25687 for (size_t k = 1; k <= 40; k += 9) {
25688 GemmMicrokernelTester()
25689 .mr(3)
25690 .nr(4)
25691 .kr(8)
25692 .sr(1)
25693 .m(3)
25694 .n(4)
25695 .k(k)
25696 .ks(3)
25697 .a_offset(127)
Marat Dukhan50323b82022-01-11 00:12:01 -080025698 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070025699 }
25700 }
25701
25702 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, zero) {
25703 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080025704 for (size_t k = 1; k <= 40; k += 9) {
25705 for (uint32_t mz = 0; mz < 3; mz++) {
Marat Dukhan801d2c22021-06-02 21:25:05 -070025706 GemmMicrokernelTester()
25707 .mr(3)
25708 .nr(4)
25709 .kr(8)
25710 .sr(1)
25711 .m(3)
25712 .n(4)
25713 .k(k)
25714 .ks(3)
25715 .a_offset(127)
25716 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080025717 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070025718 }
25719 }
25720 }
25721
25722 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, qmin) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070025723 TEST_REQUIRES_X86_AVX;
25724 GemmMicrokernelTester()
25725 .mr(3)
25726 .nr(4)
25727 .kr(8)
25728 .sr(1)
25729 .m(3)
25730 .n(4)
25731 .k(8)
25732 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080025733 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025734 }
25735
Marat Dukhan801d2c22021-06-02 21:25:05 -070025736 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, qmax) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070025737 TEST_REQUIRES_X86_AVX;
25738 GemmMicrokernelTester()
25739 .mr(3)
25740 .nr(4)
25741 .kr(8)
25742 .sr(1)
25743 .m(3)
25744 .n(4)
25745 .k(8)
25746 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080025747 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025748 }
25749
Marat Dukhan801d2c22021-06-02 21:25:05 -070025750 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, strided_cm) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070025751 TEST_REQUIRES_X86_AVX;
25752 GemmMicrokernelTester()
25753 .mr(3)
25754 .nr(4)
25755 .kr(8)
25756 .sr(1)
25757 .m(3)
25758 .n(4)
25759 .k(8)
25760 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080025761 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025762 }
25763#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
25764
25765
25766#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan801d2c22021-06-02 21:25:05 -070025767 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_eq_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070025768 TEST_REQUIRES_X86_XOP;
25769 GemmMicrokernelTester()
25770 .mr(2)
25771 .nr(4)
25772 .kr(8)
25773 .sr(1)
25774 .m(2)
25775 .n(4)
25776 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080025777 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025778 }
25779
Marat Dukhan801d2c22021-06-02 21:25:05 -070025780 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070025781 TEST_REQUIRES_X86_XOP;
25782 GemmMicrokernelTester()
25783 .mr(2)
25784 .nr(4)
25785 .kr(8)
25786 .sr(1)
25787 .m(2)
25788 .n(4)
25789 .k(8)
25790 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080025791 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025792 }
25793
Marat Dukhan801d2c22021-06-02 21:25:05 -070025794 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_eq_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070025795 TEST_REQUIRES_X86_XOP;
Zhi An Ng83844ae2022-01-14 09:52:25 -080025796 for (uint32_t n = 1; n <= 4; n++) {
25797 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070025798 GemmMicrokernelTester()
25799 .mr(2)
25800 .nr(4)
25801 .kr(8)
25802 .sr(1)
25803 .m(m)
25804 .n(n)
25805 .k(8)
25806 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025807 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025808 }
25809 }
25810 }
25811
Marat Dukhan801d2c22021-06-02 21:25:05 -070025812 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_eq_8_subtile_m) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070025813 TEST_REQUIRES_X86_XOP;
25814 for (uint32_t m = 1; m <= 2; m++) {
25815 GemmMicrokernelTester()
25816 .mr(2)
25817 .nr(4)
25818 .kr(8)
25819 .sr(1)
25820 .m(m)
25821 .n(4)
25822 .k(8)
25823 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025824 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025825 }
25826 }
25827
Marat Dukhan801d2c22021-06-02 21:25:05 -070025828 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_eq_8_subtile_n) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070025829 TEST_REQUIRES_X86_XOP;
25830 for (uint32_t n = 1; n <= 4; n++) {
25831 GemmMicrokernelTester()
25832 .mr(2)
25833 .nr(4)
25834 .kr(8)
25835 .sr(1)
25836 .m(2)
25837 .n(n)
25838 .k(8)
25839 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025840 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025841 }
25842 }
25843
Marat Dukhan801d2c22021-06-02 21:25:05 -070025844 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_lt_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070025845 TEST_REQUIRES_X86_XOP;
25846 for (size_t k = 1; k < 8; k++) {
25847 GemmMicrokernelTester()
25848 .mr(2)
25849 .nr(4)
25850 .kr(8)
25851 .sr(1)
25852 .m(2)
25853 .n(4)
25854 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080025855 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025856 }
25857 }
25858
Marat Dukhan801d2c22021-06-02 21:25:05 -070025859 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_lt_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070025860 TEST_REQUIRES_X86_XOP;
25861 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025862 for (uint32_t n = 1; n <= 4; n++) {
25863 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070025864 GemmMicrokernelTester()
25865 .mr(2)
25866 .nr(4)
25867 .kr(8)
25868 .sr(1)
25869 .m(m)
25870 .n(n)
25871 .k(k)
25872 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025873 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025874 }
25875 }
25876 }
25877 }
25878
Marat Dukhan801d2c22021-06-02 21:25:05 -070025879 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_gt_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070025880 TEST_REQUIRES_X86_XOP;
25881 for (size_t k = 9; k < 16; k++) {
25882 GemmMicrokernelTester()
25883 .mr(2)
25884 .nr(4)
25885 .kr(8)
25886 .sr(1)
25887 .m(2)
25888 .n(4)
25889 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080025890 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025891 }
25892 }
25893
Marat Dukhan801d2c22021-06-02 21:25:05 -070025894 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_gt_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070025895 TEST_REQUIRES_X86_XOP;
25896 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025897 for (uint32_t n = 1; n <= 4; n++) {
25898 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070025899 GemmMicrokernelTester()
25900 .mr(2)
25901 .nr(4)
25902 .kr(8)
25903 .sr(1)
25904 .m(m)
25905 .n(n)
25906 .k(k)
25907 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025908 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025909 }
25910 }
25911 }
25912 }
25913
Marat Dukhan801d2c22021-06-02 21:25:05 -070025914 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_div_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070025915 TEST_REQUIRES_X86_XOP;
25916 for (size_t k = 16; k <= 80; k += 8) {
25917 GemmMicrokernelTester()
25918 .mr(2)
25919 .nr(4)
25920 .kr(8)
25921 .sr(1)
25922 .m(2)
25923 .n(4)
25924 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080025925 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025926 }
25927 }
25928
Marat Dukhan801d2c22021-06-02 21:25:05 -070025929 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_div_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070025930 TEST_REQUIRES_X86_XOP;
25931 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025932 for (uint32_t n = 1; n <= 4; n++) {
25933 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070025934 GemmMicrokernelTester()
25935 .mr(2)
25936 .nr(4)
25937 .kr(8)
25938 .sr(1)
25939 .m(m)
25940 .n(n)
25941 .k(k)
25942 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025943 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025944 }
25945 }
25946 }
25947 }
25948
Marat Dukhan801d2c22021-06-02 21:25:05 -070025949 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_gt_4) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070025950 TEST_REQUIRES_X86_XOP;
25951 for (uint32_t n = 5; n < 8; n++) {
25952 for (size_t k = 1; k <= 40; k += 9) {
25953 GemmMicrokernelTester()
25954 .mr(2)
25955 .nr(4)
25956 .kr(8)
25957 .sr(1)
25958 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025959 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070025960 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080025961 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025962 }
25963 }
25964 }
25965
Marat Dukhan801d2c22021-06-02 21:25:05 -070025966 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_gt_4_strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070025967 TEST_REQUIRES_X86_XOP;
25968 for (uint32_t n = 5; n < 8; n++) {
25969 for (size_t k = 1; k <= 40; k += 9) {
25970 GemmMicrokernelTester()
25971 .mr(2)
25972 .nr(4)
25973 .kr(8)
25974 .sr(1)
25975 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025976 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070025977 .k(k)
25978 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080025979 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025980 }
25981 }
25982 }
25983
Marat Dukhan801d2c22021-06-02 21:25:05 -070025984 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_gt_4_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070025985 TEST_REQUIRES_X86_XOP;
25986 for (uint32_t n = 5; n < 8; n++) {
25987 for (size_t k = 1; k <= 40; k += 9) {
25988 for (uint32_t m = 1; m <= 2; m++) {
25989 GemmMicrokernelTester()
25990 .mr(2)
25991 .nr(4)
25992 .kr(8)
25993 .sr(1)
25994 .m(m)
25995 .n(n)
25996 .k(k)
25997 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025998 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025999 }
26000 }
26001 }
26002 }
26003
Marat Dukhan801d2c22021-06-02 21:25:05 -070026004 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_div_4) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070026005 TEST_REQUIRES_X86_XOP;
26006 for (uint32_t n = 8; n <= 12; n += 4) {
26007 for (size_t k = 1; k <= 40; k += 9) {
26008 GemmMicrokernelTester()
26009 .mr(2)
26010 .nr(4)
26011 .kr(8)
26012 .sr(1)
26013 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026014 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070026015 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080026016 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026017 }
26018 }
26019 }
26020
Marat Dukhan801d2c22021-06-02 21:25:05 -070026021 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_div_4_strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070026022 TEST_REQUIRES_X86_XOP;
26023 for (uint32_t n = 8; n <= 12; n += 4) {
26024 for (size_t k = 1; k <= 40; k += 9) {
26025 GemmMicrokernelTester()
26026 .mr(2)
26027 .nr(4)
26028 .kr(8)
26029 .sr(1)
26030 .m(2)
26031 .n(n)
26032 .k(k)
26033 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080026034 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026035 }
26036 }
26037 }
26038
Marat Dukhan801d2c22021-06-02 21:25:05 -070026039 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_div_4_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070026040 TEST_REQUIRES_X86_XOP;
26041 for (uint32_t n = 8; n <= 12; n += 4) {
26042 for (size_t k = 1; k <= 40; k += 9) {
26043 for (uint32_t m = 1; m <= 2; m++) {
26044 GemmMicrokernelTester()
26045 .mr(2)
26046 .nr(4)
26047 .kr(8)
26048 .sr(1)
26049 .m(m)
26050 .n(n)
26051 .k(k)
26052 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026053 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026054 }
26055 }
26056 }
26057 }
26058
Marat Dukhan801d2c22021-06-02 21:25:05 -070026059 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, small_kernel) {
26060 TEST_REQUIRES_X86_XOP;
26061 for (size_t k = 1; k <= 40; k += 9) {
26062 GemmMicrokernelTester()
26063 .mr(2)
26064 .nr(4)
26065 .kr(8)
26066 .sr(1)
26067 .m(2)
26068 .n(4)
26069 .k(k)
26070 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080026071 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070026072 }
26073 }
26074
26075 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, small_kernel_subtile) {
26076 TEST_REQUIRES_X86_XOP;
26077 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026078 for (uint32_t n = 1; n <= 4; n++) {
26079 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan801d2c22021-06-02 21:25:05 -070026080 GemmMicrokernelTester()
26081 .mr(2)
26082 .nr(4)
26083 .kr(8)
26084 .sr(1)
26085 .m(m)
26086 .n(n)
26087 .k(k)
26088 .ks(3)
26089 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026090 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070026091 }
26092 }
26093 }
26094 }
26095
26096 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_gt_4_small_kernel) {
26097 TEST_REQUIRES_X86_XOP;
26098 for (uint32_t n = 5; n < 8; n++) {
26099 for (size_t k = 1; k <= 40; k += 9) {
26100 GemmMicrokernelTester()
26101 .mr(2)
26102 .nr(4)
26103 .kr(8)
26104 .sr(1)
26105 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026106 .n(n)
Marat Dukhan801d2c22021-06-02 21:25:05 -070026107 .k(k)
26108 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080026109 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070026110 }
26111 }
26112 }
26113
26114 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_div_4_small_kernel) {
26115 TEST_REQUIRES_X86_XOP;
26116 for (uint32_t n = 8; n <= 12; n += 4) {
26117 for (size_t k = 1; k <= 40; k += 9) {
26118 GemmMicrokernelTester()
26119 .mr(2)
26120 .nr(4)
26121 .kr(8)
26122 .sr(1)
26123 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026124 .n(n)
Marat Dukhan801d2c22021-06-02 21:25:05 -070026125 .k(k)
26126 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080026127 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070026128 }
26129 }
26130 }
26131
26132 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, strided_cm_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070026133 TEST_REQUIRES_X86_XOP;
26134 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026135 for (uint32_t n = 1; n <= 4; n++) {
26136 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070026137 GemmMicrokernelTester()
26138 .mr(2)
26139 .nr(4)
26140 .kr(8)
26141 .sr(1)
26142 .m(m)
26143 .n(n)
26144 .k(k)
26145 .cm_stride(7)
26146 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026147 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026148 }
26149 }
26150 }
26151 }
26152
Marat Dukhan801d2c22021-06-02 21:25:05 -070026153 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, a_offset) {
26154 TEST_REQUIRES_X86_XOP;
26155 for (size_t k = 1; k <= 40; k += 9) {
26156 GemmMicrokernelTester()
26157 .mr(2)
26158 .nr(4)
26159 .kr(8)
26160 .sr(1)
26161 .m(2)
26162 .n(4)
26163 .k(k)
26164 .ks(3)
26165 .a_offset(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080026166 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070026167 }
26168 }
26169
26170 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, zero) {
26171 TEST_REQUIRES_X86_XOP;
Zhi An Ng83844ae2022-01-14 09:52:25 -080026172 for (size_t k = 1; k <= 40; k += 9) {
26173 for (uint32_t mz = 0; mz < 2; mz++) {
Marat Dukhan801d2c22021-06-02 21:25:05 -070026174 GemmMicrokernelTester()
26175 .mr(2)
26176 .nr(4)
26177 .kr(8)
26178 .sr(1)
26179 .m(2)
26180 .n(4)
26181 .k(k)
26182 .ks(3)
26183 .a_offset(83)
26184 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080026185 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070026186 }
26187 }
26188 }
26189
26190 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, qmin) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070026191 TEST_REQUIRES_X86_XOP;
26192 GemmMicrokernelTester()
26193 .mr(2)
26194 .nr(4)
26195 .kr(8)
26196 .sr(1)
26197 .m(2)
26198 .n(4)
26199 .k(8)
26200 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080026201 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026202 }
26203
Marat Dukhan801d2c22021-06-02 21:25:05 -070026204 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, qmax) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070026205 TEST_REQUIRES_X86_XOP;
26206 GemmMicrokernelTester()
26207 .mr(2)
26208 .nr(4)
26209 .kr(8)
26210 .sr(1)
26211 .m(2)
26212 .n(4)
26213 .k(8)
26214 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080026215 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026216 }
26217
Marat Dukhan801d2c22021-06-02 21:25:05 -070026218 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, strided_cm) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070026219 TEST_REQUIRES_X86_XOP;
26220 GemmMicrokernelTester()
26221 .mr(2)
26222 .nr(4)
26223 .kr(8)
26224 .sr(1)
26225 .m(2)
26226 .n(4)
26227 .k(8)
26228 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080026229 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026230 }
26231#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
26232
26233
26234#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan801d2c22021-06-02 21:25:05 -070026235 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_eq_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070026236 TEST_REQUIRES_X86_XOP;
26237 GemmMicrokernelTester()
26238 .mr(3)
26239 .nr(4)
26240 .kr(8)
26241 .sr(1)
26242 .m(3)
26243 .n(4)
26244 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080026245 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026246 }
26247
Marat Dukhan801d2c22021-06-02 21:25:05 -070026248 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070026249 TEST_REQUIRES_X86_XOP;
26250 GemmMicrokernelTester()
26251 .mr(3)
26252 .nr(4)
26253 .kr(8)
26254 .sr(1)
26255 .m(3)
26256 .n(4)
26257 .k(8)
26258 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080026259 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026260 }
26261
Marat Dukhan801d2c22021-06-02 21:25:05 -070026262 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_eq_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070026263 TEST_REQUIRES_X86_XOP;
Zhi An Ng83844ae2022-01-14 09:52:25 -080026264 for (uint32_t n = 1; n <= 4; n++) {
26265 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070026266 GemmMicrokernelTester()
26267 .mr(3)
26268 .nr(4)
26269 .kr(8)
26270 .sr(1)
26271 .m(m)
26272 .n(n)
26273 .k(8)
26274 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026275 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026276 }
26277 }
26278 }
26279
Marat Dukhan801d2c22021-06-02 21:25:05 -070026280 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_eq_8_subtile_m) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070026281 TEST_REQUIRES_X86_XOP;
26282 for (uint32_t m = 1; m <= 3; m++) {
26283 GemmMicrokernelTester()
26284 .mr(3)
26285 .nr(4)
26286 .kr(8)
26287 .sr(1)
26288 .m(m)
26289 .n(4)
26290 .k(8)
26291 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026292 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026293 }
26294 }
26295
Marat Dukhan801d2c22021-06-02 21:25:05 -070026296 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_eq_8_subtile_n) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070026297 TEST_REQUIRES_X86_XOP;
26298 for (uint32_t n = 1; n <= 4; n++) {
26299 GemmMicrokernelTester()
26300 .mr(3)
26301 .nr(4)
26302 .kr(8)
26303 .sr(1)
26304 .m(3)
26305 .n(n)
26306 .k(8)
26307 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026308 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026309 }
26310 }
26311
Marat Dukhan801d2c22021-06-02 21:25:05 -070026312 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_lt_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070026313 TEST_REQUIRES_X86_XOP;
26314 for (size_t k = 1; k < 8; k++) {
26315 GemmMicrokernelTester()
26316 .mr(3)
26317 .nr(4)
26318 .kr(8)
26319 .sr(1)
26320 .m(3)
26321 .n(4)
26322 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080026323 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026324 }
26325 }
26326
Marat Dukhan801d2c22021-06-02 21:25:05 -070026327 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_lt_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070026328 TEST_REQUIRES_X86_XOP;
26329 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026330 for (uint32_t n = 1; n <= 4; n++) {
26331 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070026332 GemmMicrokernelTester()
26333 .mr(3)
26334 .nr(4)
26335 .kr(8)
26336 .sr(1)
26337 .m(m)
26338 .n(n)
26339 .k(k)
26340 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026341 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026342 }
26343 }
26344 }
26345 }
26346
Marat Dukhan801d2c22021-06-02 21:25:05 -070026347 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_gt_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070026348 TEST_REQUIRES_X86_XOP;
26349 for (size_t k = 9; k < 16; k++) {
26350 GemmMicrokernelTester()
26351 .mr(3)
26352 .nr(4)
26353 .kr(8)
26354 .sr(1)
26355 .m(3)
26356 .n(4)
26357 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080026358 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026359 }
26360 }
26361
Marat Dukhan801d2c22021-06-02 21:25:05 -070026362 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_gt_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070026363 TEST_REQUIRES_X86_XOP;
26364 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026365 for (uint32_t n = 1; n <= 4; n++) {
26366 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070026367 GemmMicrokernelTester()
26368 .mr(3)
26369 .nr(4)
26370 .kr(8)
26371 .sr(1)
26372 .m(m)
26373 .n(n)
26374 .k(k)
26375 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026376 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026377 }
26378 }
26379 }
26380 }
26381
Marat Dukhan801d2c22021-06-02 21:25:05 -070026382 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_div_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070026383 TEST_REQUIRES_X86_XOP;
26384 for (size_t k = 16; k <= 80; k += 8) {
26385 GemmMicrokernelTester()
26386 .mr(3)
26387 .nr(4)
26388 .kr(8)
26389 .sr(1)
26390 .m(3)
26391 .n(4)
26392 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080026393 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026394 }
26395 }
26396
Marat Dukhan801d2c22021-06-02 21:25:05 -070026397 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_div_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070026398 TEST_REQUIRES_X86_XOP;
26399 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026400 for (uint32_t n = 1; n <= 4; n++) {
26401 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070026402 GemmMicrokernelTester()
26403 .mr(3)
26404 .nr(4)
26405 .kr(8)
26406 .sr(1)
26407 .m(m)
26408 .n(n)
26409 .k(k)
26410 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026411 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026412 }
26413 }
26414 }
26415 }
26416
Marat Dukhan801d2c22021-06-02 21:25:05 -070026417 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_gt_4) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070026418 TEST_REQUIRES_X86_XOP;
26419 for (uint32_t n = 5; n < 8; n++) {
26420 for (size_t k = 1; k <= 40; k += 9) {
26421 GemmMicrokernelTester()
26422 .mr(3)
26423 .nr(4)
26424 .kr(8)
26425 .sr(1)
26426 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026427 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070026428 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080026429 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026430 }
26431 }
26432 }
26433
Marat Dukhan801d2c22021-06-02 21:25:05 -070026434 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_gt_4_strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070026435 TEST_REQUIRES_X86_XOP;
26436 for (uint32_t n = 5; n < 8; n++) {
26437 for (size_t k = 1; k <= 40; k += 9) {
26438 GemmMicrokernelTester()
26439 .mr(3)
26440 .nr(4)
26441 .kr(8)
26442 .sr(1)
26443 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026444 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070026445 .k(k)
26446 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080026447 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026448 }
26449 }
26450 }
26451
Marat Dukhan801d2c22021-06-02 21:25:05 -070026452 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_gt_4_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070026453 TEST_REQUIRES_X86_XOP;
26454 for (uint32_t n = 5; n < 8; n++) {
26455 for (size_t k = 1; k <= 40; k += 9) {
26456 for (uint32_t m = 1; m <= 3; m++) {
26457 GemmMicrokernelTester()
26458 .mr(3)
26459 .nr(4)
26460 .kr(8)
26461 .sr(1)
26462 .m(m)
26463 .n(n)
26464 .k(k)
26465 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026466 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026467 }
26468 }
26469 }
26470 }
26471
Marat Dukhan801d2c22021-06-02 21:25:05 -070026472 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_div_4) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070026473 TEST_REQUIRES_X86_XOP;
26474 for (uint32_t n = 8; n <= 12; n += 4) {
26475 for (size_t k = 1; k <= 40; k += 9) {
26476 GemmMicrokernelTester()
26477 .mr(3)
26478 .nr(4)
26479 .kr(8)
26480 .sr(1)
26481 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026482 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070026483 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080026484 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026485 }
26486 }
26487 }
26488
Marat Dukhan801d2c22021-06-02 21:25:05 -070026489 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_div_4_strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070026490 TEST_REQUIRES_X86_XOP;
26491 for (uint32_t n = 8; n <= 12; n += 4) {
26492 for (size_t k = 1; k <= 40; k += 9) {
26493 GemmMicrokernelTester()
26494 .mr(3)
26495 .nr(4)
26496 .kr(8)
26497 .sr(1)
26498 .m(3)
26499 .n(n)
26500 .k(k)
26501 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080026502 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026503 }
26504 }
26505 }
26506
Marat Dukhan801d2c22021-06-02 21:25:05 -070026507 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_div_4_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070026508 TEST_REQUIRES_X86_XOP;
26509 for (uint32_t n = 8; n <= 12; n += 4) {
26510 for (size_t k = 1; k <= 40; k += 9) {
26511 for (uint32_t m = 1; m <= 3; m++) {
26512 GemmMicrokernelTester()
26513 .mr(3)
26514 .nr(4)
26515 .kr(8)
26516 .sr(1)
26517 .m(m)
26518 .n(n)
26519 .k(k)
26520 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026521 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026522 }
26523 }
26524 }
26525 }
26526
Marat Dukhan801d2c22021-06-02 21:25:05 -070026527 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, small_kernel) {
26528 TEST_REQUIRES_X86_XOP;
26529 for (size_t k = 1; k <= 40; k += 9) {
26530 GemmMicrokernelTester()
26531 .mr(3)
26532 .nr(4)
26533 .kr(8)
26534 .sr(1)
26535 .m(3)
26536 .n(4)
26537 .k(k)
26538 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080026539 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070026540 }
26541 }
26542
26543 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, small_kernel_subtile) {
26544 TEST_REQUIRES_X86_XOP;
26545 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026546 for (uint32_t n = 1; n <= 4; n++) {
26547 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan801d2c22021-06-02 21:25:05 -070026548 GemmMicrokernelTester()
26549 .mr(3)
26550 .nr(4)
26551 .kr(8)
26552 .sr(1)
26553 .m(m)
26554 .n(n)
26555 .k(k)
26556 .ks(3)
26557 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026558 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070026559 }
26560 }
26561 }
26562 }
26563
26564 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_gt_4_small_kernel) {
26565 TEST_REQUIRES_X86_XOP;
26566 for (uint32_t n = 5; n < 8; n++) {
26567 for (size_t k = 1; k <= 40; k += 9) {
26568 GemmMicrokernelTester()
26569 .mr(3)
26570 .nr(4)
26571 .kr(8)
26572 .sr(1)
26573 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026574 .n(n)
Marat Dukhan801d2c22021-06-02 21:25:05 -070026575 .k(k)
26576 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080026577 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070026578 }
26579 }
26580 }
26581
26582 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_div_4_small_kernel) {
26583 TEST_REQUIRES_X86_XOP;
26584 for (uint32_t n = 8; n <= 12; n += 4) {
26585 for (size_t k = 1; k <= 40; k += 9) {
26586 GemmMicrokernelTester()
26587 .mr(3)
26588 .nr(4)
26589 .kr(8)
26590 .sr(1)
26591 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026592 .n(n)
Marat Dukhan801d2c22021-06-02 21:25:05 -070026593 .k(k)
26594 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080026595 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070026596 }
26597 }
26598 }
26599
26600 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, strided_cm_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070026601 TEST_REQUIRES_X86_XOP;
26602 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026603 for (uint32_t n = 1; n <= 4; n++) {
26604 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070026605 GemmMicrokernelTester()
26606 .mr(3)
26607 .nr(4)
26608 .kr(8)
26609 .sr(1)
26610 .m(m)
26611 .n(n)
26612 .k(k)
26613 .cm_stride(7)
26614 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026615 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026616 }
26617 }
26618 }
26619 }
26620
Marat Dukhan801d2c22021-06-02 21:25:05 -070026621 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, a_offset) {
26622 TEST_REQUIRES_X86_XOP;
26623 for (size_t k = 1; k <= 40; k += 9) {
26624 GemmMicrokernelTester()
26625 .mr(3)
26626 .nr(4)
26627 .kr(8)
26628 .sr(1)
26629 .m(3)
26630 .n(4)
26631 .k(k)
26632 .ks(3)
26633 .a_offset(127)
Marat Dukhan50323b82022-01-11 00:12:01 -080026634 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070026635 }
26636 }
26637
26638 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, zero) {
26639 TEST_REQUIRES_X86_XOP;
Zhi An Ng83844ae2022-01-14 09:52:25 -080026640 for (size_t k = 1; k <= 40; k += 9) {
26641 for (uint32_t mz = 0; mz < 3; mz++) {
Marat Dukhan801d2c22021-06-02 21:25:05 -070026642 GemmMicrokernelTester()
26643 .mr(3)
26644 .nr(4)
26645 .kr(8)
26646 .sr(1)
26647 .m(3)
26648 .n(4)
26649 .k(k)
26650 .ks(3)
26651 .a_offset(127)
26652 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080026653 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070026654 }
26655 }
26656 }
26657
26658 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, qmin) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070026659 TEST_REQUIRES_X86_XOP;
26660 GemmMicrokernelTester()
26661 .mr(3)
26662 .nr(4)
26663 .kr(8)
26664 .sr(1)
26665 .m(3)
26666 .n(4)
26667 .k(8)
26668 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080026669 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026670 }
26671
Marat Dukhan801d2c22021-06-02 21:25:05 -070026672 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, qmax) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070026673 TEST_REQUIRES_X86_XOP;
26674 GemmMicrokernelTester()
26675 .mr(3)
26676 .nr(4)
26677 .kr(8)
26678 .sr(1)
26679 .m(3)
26680 .n(4)
26681 .k(8)
26682 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080026683 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026684 }
26685
Marat Dukhan801d2c22021-06-02 21:25:05 -070026686 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, strided_cm) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070026687 TEST_REQUIRES_X86_XOP;
26688 GemmMicrokernelTester()
26689 .mr(3)
26690 .nr(4)
26691 .kr(8)
26692 .sr(1)
26693 .m(3)
26694 .n(4)
26695 .k(8)
26696 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080026697 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026698 }
26699#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
26700
26701
26702#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan801d2c22021-06-02 21:25:05 -070026703 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_eq_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070026704 TEST_REQUIRES_X86_SSE2;
26705 GemmMicrokernelTester()
26706 .mr(1)
26707 .nr(4)
26708 .kr(8)
26709 .sr(1)
26710 .m(1)
26711 .n(4)
26712 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080026713 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026714 }
26715
Marat Dukhan801d2c22021-06-02 21:25:05 -070026716 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070026717 TEST_REQUIRES_X86_SSE2;
26718 GemmMicrokernelTester()
26719 .mr(1)
26720 .nr(4)
26721 .kr(8)
26722 .sr(1)
26723 .m(1)
26724 .n(4)
26725 .k(8)
26726 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080026727 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026728 }
26729
Marat Dukhan801d2c22021-06-02 21:25:05 -070026730 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_eq_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070026731 TEST_REQUIRES_X86_SSE2;
Zhi An Ng83844ae2022-01-14 09:52:25 -080026732 for (uint32_t n = 1; n <= 4; n++) {
26733 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070026734 GemmMicrokernelTester()
26735 .mr(1)
26736 .nr(4)
26737 .kr(8)
26738 .sr(1)
26739 .m(m)
26740 .n(n)
26741 .k(8)
26742 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026743 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026744 }
26745 }
26746 }
26747
Marat Dukhan801d2c22021-06-02 21:25:05 -070026748 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_eq_8_subtile_m) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070026749 TEST_REQUIRES_X86_SSE2;
26750 for (uint32_t m = 1; m <= 1; m++) {
26751 GemmMicrokernelTester()
26752 .mr(1)
26753 .nr(4)
26754 .kr(8)
26755 .sr(1)
26756 .m(m)
26757 .n(4)
26758 .k(8)
26759 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026760 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026761 }
26762 }
26763
Marat Dukhan801d2c22021-06-02 21:25:05 -070026764 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_eq_8_subtile_n) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070026765 TEST_REQUIRES_X86_SSE2;
26766 for (uint32_t n = 1; n <= 4; n++) {
26767 GemmMicrokernelTester()
26768 .mr(1)
26769 .nr(4)
26770 .kr(8)
26771 .sr(1)
26772 .m(1)
26773 .n(n)
26774 .k(8)
26775 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026776 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026777 }
26778 }
26779
Marat Dukhan801d2c22021-06-02 21:25:05 -070026780 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_lt_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070026781 TEST_REQUIRES_X86_SSE2;
26782 for (size_t k = 1; k < 8; k++) {
26783 GemmMicrokernelTester()
26784 .mr(1)
26785 .nr(4)
26786 .kr(8)
26787 .sr(1)
26788 .m(1)
26789 .n(4)
26790 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080026791 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026792 }
26793 }
26794
Marat Dukhan801d2c22021-06-02 21:25:05 -070026795 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_lt_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070026796 TEST_REQUIRES_X86_SSE2;
26797 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026798 for (uint32_t n = 1; n <= 4; n++) {
26799 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070026800 GemmMicrokernelTester()
26801 .mr(1)
26802 .nr(4)
26803 .kr(8)
26804 .sr(1)
26805 .m(m)
26806 .n(n)
26807 .k(k)
26808 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026809 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026810 }
26811 }
26812 }
26813 }
26814
Marat Dukhan801d2c22021-06-02 21:25:05 -070026815 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_gt_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070026816 TEST_REQUIRES_X86_SSE2;
26817 for (size_t k = 9; k < 16; k++) {
26818 GemmMicrokernelTester()
26819 .mr(1)
26820 .nr(4)
26821 .kr(8)
26822 .sr(1)
26823 .m(1)
26824 .n(4)
26825 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080026826 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026827 }
26828 }
26829
Marat Dukhan801d2c22021-06-02 21:25:05 -070026830 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_gt_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070026831 TEST_REQUIRES_X86_SSE2;
26832 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026833 for (uint32_t n = 1; n <= 4; n++) {
26834 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070026835 GemmMicrokernelTester()
26836 .mr(1)
26837 .nr(4)
26838 .kr(8)
26839 .sr(1)
26840 .m(m)
26841 .n(n)
26842 .k(k)
26843 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026844 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026845 }
26846 }
26847 }
26848 }
26849
Marat Dukhan801d2c22021-06-02 21:25:05 -070026850 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_div_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070026851 TEST_REQUIRES_X86_SSE2;
26852 for (size_t k = 16; k <= 80; k += 8) {
26853 GemmMicrokernelTester()
26854 .mr(1)
26855 .nr(4)
26856 .kr(8)
26857 .sr(1)
26858 .m(1)
26859 .n(4)
26860 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080026861 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026862 }
26863 }
26864
Marat Dukhan801d2c22021-06-02 21:25:05 -070026865 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_div_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070026866 TEST_REQUIRES_X86_SSE2;
26867 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026868 for (uint32_t n = 1; n <= 4; n++) {
26869 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070026870 GemmMicrokernelTester()
26871 .mr(1)
26872 .nr(4)
26873 .kr(8)
26874 .sr(1)
26875 .m(m)
26876 .n(n)
26877 .k(k)
26878 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026879 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026880 }
26881 }
26882 }
26883 }
26884
Marat Dukhan801d2c22021-06-02 21:25:05 -070026885 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_gt_4) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070026886 TEST_REQUIRES_X86_SSE2;
26887 for (uint32_t n = 5; n < 8; n++) {
26888 for (size_t k = 1; k <= 40; k += 9) {
26889 GemmMicrokernelTester()
26890 .mr(1)
26891 .nr(4)
26892 .kr(8)
26893 .sr(1)
26894 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026895 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070026896 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080026897 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026898 }
26899 }
26900 }
26901
Marat Dukhan801d2c22021-06-02 21:25:05 -070026902 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_gt_4_strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070026903 TEST_REQUIRES_X86_SSE2;
26904 for (uint32_t n = 5; n < 8; n++) {
26905 for (size_t k = 1; k <= 40; k += 9) {
26906 GemmMicrokernelTester()
26907 .mr(1)
26908 .nr(4)
26909 .kr(8)
26910 .sr(1)
26911 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026912 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070026913 .k(k)
26914 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080026915 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026916 }
26917 }
26918 }
26919
Marat Dukhan801d2c22021-06-02 21:25:05 -070026920 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_gt_4_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070026921 TEST_REQUIRES_X86_SSE2;
26922 for (uint32_t n = 5; n < 8; n++) {
26923 for (size_t k = 1; k <= 40; k += 9) {
26924 for (uint32_t m = 1; m <= 1; m++) {
26925 GemmMicrokernelTester()
26926 .mr(1)
26927 .nr(4)
26928 .kr(8)
26929 .sr(1)
26930 .m(m)
26931 .n(n)
26932 .k(k)
26933 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026934 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026935 }
26936 }
26937 }
26938 }
26939
Marat Dukhan801d2c22021-06-02 21:25:05 -070026940 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_div_4) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070026941 TEST_REQUIRES_X86_SSE2;
26942 for (uint32_t n = 8; n <= 12; n += 4) {
26943 for (size_t k = 1; k <= 40; k += 9) {
26944 GemmMicrokernelTester()
26945 .mr(1)
26946 .nr(4)
26947 .kr(8)
26948 .sr(1)
26949 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026950 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070026951 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080026952 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026953 }
26954 }
26955 }
26956
Marat Dukhan801d2c22021-06-02 21:25:05 -070026957 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_div_4_strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070026958 TEST_REQUIRES_X86_SSE2;
26959 for (uint32_t n = 8; n <= 12; n += 4) {
26960 for (size_t k = 1; k <= 40; k += 9) {
26961 GemmMicrokernelTester()
26962 .mr(1)
26963 .nr(4)
26964 .kr(8)
26965 .sr(1)
26966 .m(1)
26967 .n(n)
26968 .k(k)
26969 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080026970 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026971 }
26972 }
26973 }
26974
Marat Dukhan801d2c22021-06-02 21:25:05 -070026975 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_div_4_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070026976 TEST_REQUIRES_X86_SSE2;
26977 for (uint32_t n = 8; n <= 12; n += 4) {
26978 for (size_t k = 1; k <= 40; k += 9) {
26979 for (uint32_t m = 1; m <= 1; m++) {
26980 GemmMicrokernelTester()
26981 .mr(1)
26982 .nr(4)
26983 .kr(8)
26984 .sr(1)
26985 .m(m)
26986 .n(n)
26987 .k(k)
26988 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026989 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026990 }
26991 }
26992 }
26993 }
26994
Marat Dukhan801d2c22021-06-02 21:25:05 -070026995 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, small_kernel) {
26996 TEST_REQUIRES_X86_SSE2;
26997 for (size_t k = 1; k <= 40; k += 9) {
26998 GemmMicrokernelTester()
26999 .mr(1)
27000 .nr(4)
27001 .kr(8)
27002 .sr(1)
27003 .m(1)
27004 .n(4)
27005 .k(k)
27006 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080027007 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070027008 }
27009 }
27010
27011 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, small_kernel_subtile) {
27012 TEST_REQUIRES_X86_SSE2;
27013 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027014 for (uint32_t n = 1; n <= 4; n++) {
27015 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan801d2c22021-06-02 21:25:05 -070027016 GemmMicrokernelTester()
27017 .mr(1)
27018 .nr(4)
27019 .kr(8)
27020 .sr(1)
27021 .m(m)
27022 .n(n)
27023 .k(k)
27024 .ks(3)
27025 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027026 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070027027 }
27028 }
27029 }
27030 }
27031
27032 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_gt_4_small_kernel) {
27033 TEST_REQUIRES_X86_SSE2;
27034 for (uint32_t n = 5; n < 8; n++) {
27035 for (size_t k = 1; k <= 40; k += 9) {
27036 GemmMicrokernelTester()
27037 .mr(1)
27038 .nr(4)
27039 .kr(8)
27040 .sr(1)
27041 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080027042 .n(n)
Marat Dukhan801d2c22021-06-02 21:25:05 -070027043 .k(k)
27044 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080027045 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070027046 }
27047 }
27048 }
27049
27050 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_div_4_small_kernel) {
27051 TEST_REQUIRES_X86_SSE2;
27052 for (uint32_t n = 8; n <= 12; n += 4) {
27053 for (size_t k = 1; k <= 40; k += 9) {
27054 GemmMicrokernelTester()
27055 .mr(1)
27056 .nr(4)
27057 .kr(8)
27058 .sr(1)
27059 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080027060 .n(n)
Marat Dukhan801d2c22021-06-02 21:25:05 -070027061 .k(k)
27062 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080027063 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070027064 }
27065 }
27066 }
27067
27068 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, strided_cm_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070027069 TEST_REQUIRES_X86_SSE2;
27070 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027071 for (uint32_t n = 1; n <= 4; n++) {
27072 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070027073 GemmMicrokernelTester()
27074 .mr(1)
27075 .nr(4)
27076 .kr(8)
27077 .sr(1)
27078 .m(m)
27079 .n(n)
27080 .k(k)
27081 .cm_stride(7)
27082 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027083 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027084 }
27085 }
27086 }
27087 }
27088
Marat Dukhan801d2c22021-06-02 21:25:05 -070027089 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, a_offset) {
27090 TEST_REQUIRES_X86_SSE2;
27091 for (size_t k = 1; k <= 40; k += 9) {
27092 GemmMicrokernelTester()
27093 .mr(1)
27094 .nr(4)
27095 .kr(8)
27096 .sr(1)
27097 .m(1)
27098 .n(4)
27099 .k(k)
27100 .ks(3)
27101 .a_offset(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080027102 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070027103 }
27104 }
27105
27106 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, zero) {
27107 TEST_REQUIRES_X86_SSE2;
Zhi An Ng83844ae2022-01-14 09:52:25 -080027108 for (size_t k = 1; k <= 40; k += 9) {
27109 for (uint32_t mz = 0; mz < 1; mz++) {
Marat Dukhan801d2c22021-06-02 21:25:05 -070027110 GemmMicrokernelTester()
27111 .mr(1)
27112 .nr(4)
27113 .kr(8)
27114 .sr(1)
27115 .m(1)
27116 .n(4)
27117 .k(k)
27118 .ks(3)
27119 .a_offset(43)
27120 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080027121 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070027122 }
27123 }
27124 }
27125
27126 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, qmin) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070027127 TEST_REQUIRES_X86_SSE2;
27128 GemmMicrokernelTester()
27129 .mr(1)
27130 .nr(4)
27131 .kr(8)
27132 .sr(1)
27133 .m(1)
27134 .n(4)
27135 .k(8)
27136 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080027137 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027138 }
27139
Marat Dukhan801d2c22021-06-02 21:25:05 -070027140 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, qmax) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070027141 TEST_REQUIRES_X86_SSE2;
27142 GemmMicrokernelTester()
27143 .mr(1)
27144 .nr(4)
27145 .kr(8)
27146 .sr(1)
27147 .m(1)
27148 .n(4)
27149 .k(8)
27150 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080027151 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027152 }
27153
Marat Dukhan801d2c22021-06-02 21:25:05 -070027154 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, strided_cm) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070027155 TEST_REQUIRES_X86_SSE2;
27156 GemmMicrokernelTester()
27157 .mr(1)
27158 .nr(4)
27159 .kr(8)
27160 .sr(1)
27161 .m(1)
27162 .n(4)
27163 .k(8)
27164 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080027165 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027166 }
27167#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
27168
27169
27170#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan801d2c22021-06-02 21:25:05 -070027171 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_eq_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070027172 TEST_REQUIRES_X86_SSE2;
27173 GemmMicrokernelTester()
27174 .mr(2)
27175 .nr(4)
27176 .kr(8)
27177 .sr(1)
27178 .m(2)
27179 .n(4)
27180 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080027181 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027182 }
27183
Marat Dukhan801d2c22021-06-02 21:25:05 -070027184 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070027185 TEST_REQUIRES_X86_SSE2;
27186 GemmMicrokernelTester()
27187 .mr(2)
27188 .nr(4)
27189 .kr(8)
27190 .sr(1)
27191 .m(2)
27192 .n(4)
27193 .k(8)
27194 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080027195 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027196 }
27197
Marat Dukhan801d2c22021-06-02 21:25:05 -070027198 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_eq_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070027199 TEST_REQUIRES_X86_SSE2;
Zhi An Ng83844ae2022-01-14 09:52:25 -080027200 for (uint32_t n = 1; n <= 4; n++) {
27201 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070027202 GemmMicrokernelTester()
27203 .mr(2)
27204 .nr(4)
27205 .kr(8)
27206 .sr(1)
27207 .m(m)
27208 .n(n)
27209 .k(8)
27210 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027211 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027212 }
27213 }
27214 }
27215
Marat Dukhan801d2c22021-06-02 21:25:05 -070027216 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_eq_8_subtile_m) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070027217 TEST_REQUIRES_X86_SSE2;
27218 for (uint32_t m = 1; m <= 2; m++) {
27219 GemmMicrokernelTester()
27220 .mr(2)
27221 .nr(4)
27222 .kr(8)
27223 .sr(1)
27224 .m(m)
27225 .n(4)
27226 .k(8)
27227 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027228 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027229 }
27230 }
27231
Marat Dukhan801d2c22021-06-02 21:25:05 -070027232 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_eq_8_subtile_n) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070027233 TEST_REQUIRES_X86_SSE2;
27234 for (uint32_t n = 1; n <= 4; n++) {
27235 GemmMicrokernelTester()
27236 .mr(2)
27237 .nr(4)
27238 .kr(8)
27239 .sr(1)
27240 .m(2)
27241 .n(n)
27242 .k(8)
27243 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027244 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027245 }
27246 }
27247
Marat Dukhan801d2c22021-06-02 21:25:05 -070027248 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_lt_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070027249 TEST_REQUIRES_X86_SSE2;
27250 for (size_t k = 1; k < 8; k++) {
27251 GemmMicrokernelTester()
27252 .mr(2)
27253 .nr(4)
27254 .kr(8)
27255 .sr(1)
27256 .m(2)
27257 .n(4)
27258 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080027259 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027260 }
27261 }
27262
Marat Dukhan801d2c22021-06-02 21:25:05 -070027263 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_lt_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070027264 TEST_REQUIRES_X86_SSE2;
27265 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027266 for (uint32_t n = 1; n <= 4; n++) {
27267 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070027268 GemmMicrokernelTester()
27269 .mr(2)
27270 .nr(4)
27271 .kr(8)
27272 .sr(1)
27273 .m(m)
27274 .n(n)
27275 .k(k)
27276 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027277 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027278 }
27279 }
27280 }
27281 }
27282
Marat Dukhan801d2c22021-06-02 21:25:05 -070027283 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_gt_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070027284 TEST_REQUIRES_X86_SSE2;
27285 for (size_t k = 9; k < 16; k++) {
27286 GemmMicrokernelTester()
27287 .mr(2)
27288 .nr(4)
27289 .kr(8)
27290 .sr(1)
27291 .m(2)
27292 .n(4)
27293 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080027294 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027295 }
27296 }
27297
Marat Dukhan801d2c22021-06-02 21:25:05 -070027298 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_gt_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070027299 TEST_REQUIRES_X86_SSE2;
27300 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027301 for (uint32_t n = 1; n <= 4; n++) {
27302 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070027303 GemmMicrokernelTester()
27304 .mr(2)
27305 .nr(4)
27306 .kr(8)
27307 .sr(1)
27308 .m(m)
27309 .n(n)
27310 .k(k)
27311 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027312 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027313 }
27314 }
27315 }
27316 }
27317
Marat Dukhan801d2c22021-06-02 21:25:05 -070027318 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_div_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070027319 TEST_REQUIRES_X86_SSE2;
27320 for (size_t k = 16; k <= 80; k += 8) {
27321 GemmMicrokernelTester()
27322 .mr(2)
27323 .nr(4)
27324 .kr(8)
27325 .sr(1)
27326 .m(2)
27327 .n(4)
27328 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080027329 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027330 }
27331 }
27332
Marat Dukhan801d2c22021-06-02 21:25:05 -070027333 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_div_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070027334 TEST_REQUIRES_X86_SSE2;
27335 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027336 for (uint32_t n = 1; n <= 4; n++) {
27337 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070027338 GemmMicrokernelTester()
27339 .mr(2)
27340 .nr(4)
27341 .kr(8)
27342 .sr(1)
27343 .m(m)
27344 .n(n)
27345 .k(k)
27346 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027347 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027348 }
27349 }
27350 }
27351 }
27352
Marat Dukhan801d2c22021-06-02 21:25:05 -070027353 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_gt_4) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070027354 TEST_REQUIRES_X86_SSE2;
27355 for (uint32_t n = 5; n < 8; n++) {
27356 for (size_t k = 1; k <= 40; k += 9) {
27357 GemmMicrokernelTester()
27358 .mr(2)
27359 .nr(4)
27360 .kr(8)
27361 .sr(1)
27362 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080027363 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070027364 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080027365 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027366 }
27367 }
27368 }
27369
Marat Dukhan801d2c22021-06-02 21:25:05 -070027370 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_gt_4_strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070027371 TEST_REQUIRES_X86_SSE2;
27372 for (uint32_t n = 5; n < 8; n++) {
27373 for (size_t k = 1; k <= 40; k += 9) {
27374 GemmMicrokernelTester()
27375 .mr(2)
27376 .nr(4)
27377 .kr(8)
27378 .sr(1)
27379 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080027380 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070027381 .k(k)
27382 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080027383 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027384 }
27385 }
27386 }
27387
Marat Dukhan801d2c22021-06-02 21:25:05 -070027388 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_gt_4_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070027389 TEST_REQUIRES_X86_SSE2;
27390 for (uint32_t n = 5; n < 8; n++) {
27391 for (size_t k = 1; k <= 40; k += 9) {
27392 for (uint32_t m = 1; m <= 2; m++) {
27393 GemmMicrokernelTester()
27394 .mr(2)
27395 .nr(4)
27396 .kr(8)
27397 .sr(1)
27398 .m(m)
27399 .n(n)
27400 .k(k)
27401 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027402 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027403 }
27404 }
27405 }
27406 }
27407
Marat Dukhan801d2c22021-06-02 21:25:05 -070027408 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_div_4) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070027409 TEST_REQUIRES_X86_SSE2;
27410 for (uint32_t n = 8; n <= 12; n += 4) {
27411 for (size_t k = 1; k <= 40; k += 9) {
27412 GemmMicrokernelTester()
27413 .mr(2)
27414 .nr(4)
27415 .kr(8)
27416 .sr(1)
27417 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080027418 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070027419 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080027420 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027421 }
27422 }
27423 }
27424
Marat Dukhan801d2c22021-06-02 21:25:05 -070027425 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_div_4_strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070027426 TEST_REQUIRES_X86_SSE2;
27427 for (uint32_t n = 8; n <= 12; n += 4) {
27428 for (size_t k = 1; k <= 40; k += 9) {
27429 GemmMicrokernelTester()
27430 .mr(2)
27431 .nr(4)
27432 .kr(8)
27433 .sr(1)
27434 .m(2)
27435 .n(n)
27436 .k(k)
27437 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080027438 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027439 }
27440 }
27441 }
27442
Marat Dukhan801d2c22021-06-02 21:25:05 -070027443 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_div_4_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070027444 TEST_REQUIRES_X86_SSE2;
27445 for (uint32_t n = 8; n <= 12; n += 4) {
27446 for (size_t k = 1; k <= 40; k += 9) {
27447 for (uint32_t m = 1; m <= 2; m++) {
27448 GemmMicrokernelTester()
27449 .mr(2)
27450 .nr(4)
27451 .kr(8)
27452 .sr(1)
27453 .m(m)
27454 .n(n)
27455 .k(k)
27456 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027457 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027458 }
27459 }
27460 }
27461 }
27462
Marat Dukhan801d2c22021-06-02 21:25:05 -070027463 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, small_kernel) {
27464 TEST_REQUIRES_X86_SSE2;
27465 for (size_t k = 1; k <= 40; k += 9) {
27466 GemmMicrokernelTester()
27467 .mr(2)
27468 .nr(4)
27469 .kr(8)
27470 .sr(1)
27471 .m(2)
27472 .n(4)
27473 .k(k)
27474 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080027475 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070027476 }
27477 }
27478
27479 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, small_kernel_subtile) {
27480 TEST_REQUIRES_X86_SSE2;
27481 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027482 for (uint32_t n = 1; n <= 4; n++) {
27483 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan801d2c22021-06-02 21:25:05 -070027484 GemmMicrokernelTester()
27485 .mr(2)
27486 .nr(4)
27487 .kr(8)
27488 .sr(1)
27489 .m(m)
27490 .n(n)
27491 .k(k)
27492 .ks(3)
27493 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027494 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070027495 }
27496 }
27497 }
27498 }
27499
27500 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_gt_4_small_kernel) {
27501 TEST_REQUIRES_X86_SSE2;
27502 for (uint32_t n = 5; n < 8; n++) {
27503 for (size_t k = 1; k <= 40; k += 9) {
27504 GemmMicrokernelTester()
27505 .mr(2)
27506 .nr(4)
27507 .kr(8)
27508 .sr(1)
27509 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080027510 .n(n)
Marat Dukhan801d2c22021-06-02 21:25:05 -070027511 .k(k)
27512 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080027513 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070027514 }
27515 }
27516 }
27517
27518 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_div_4_small_kernel) {
27519 TEST_REQUIRES_X86_SSE2;
27520 for (uint32_t n = 8; n <= 12; n += 4) {
27521 for (size_t k = 1; k <= 40; k += 9) {
27522 GemmMicrokernelTester()
27523 .mr(2)
27524 .nr(4)
27525 .kr(8)
27526 .sr(1)
27527 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080027528 .n(n)
Marat Dukhan801d2c22021-06-02 21:25:05 -070027529 .k(k)
27530 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080027531 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070027532 }
27533 }
27534 }
27535
27536 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, strided_cm_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070027537 TEST_REQUIRES_X86_SSE2;
27538 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027539 for (uint32_t n = 1; n <= 4; n++) {
27540 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070027541 GemmMicrokernelTester()
27542 .mr(2)
27543 .nr(4)
27544 .kr(8)
27545 .sr(1)
27546 .m(m)
27547 .n(n)
27548 .k(k)
27549 .cm_stride(7)
27550 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027551 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027552 }
27553 }
27554 }
27555 }
27556
Marat Dukhan801d2c22021-06-02 21:25:05 -070027557 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, a_offset) {
27558 TEST_REQUIRES_X86_SSE2;
27559 for (size_t k = 1; k <= 40; k += 9) {
27560 GemmMicrokernelTester()
27561 .mr(2)
27562 .nr(4)
27563 .kr(8)
27564 .sr(1)
27565 .m(2)
27566 .n(4)
27567 .k(k)
27568 .ks(3)
27569 .a_offset(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080027570 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070027571 }
27572 }
27573
27574 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, zero) {
27575 TEST_REQUIRES_X86_SSE2;
Zhi An Ng83844ae2022-01-14 09:52:25 -080027576 for (size_t k = 1; k <= 40; k += 9) {
27577 for (uint32_t mz = 0; mz < 2; mz++) {
Marat Dukhan801d2c22021-06-02 21:25:05 -070027578 GemmMicrokernelTester()
27579 .mr(2)
27580 .nr(4)
27581 .kr(8)
27582 .sr(1)
27583 .m(2)
27584 .n(4)
27585 .k(k)
27586 .ks(3)
27587 .a_offset(83)
27588 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080027589 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070027590 }
27591 }
27592 }
27593
27594 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, qmin) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070027595 TEST_REQUIRES_X86_SSE2;
27596 GemmMicrokernelTester()
27597 .mr(2)
27598 .nr(4)
27599 .kr(8)
27600 .sr(1)
27601 .m(2)
27602 .n(4)
27603 .k(8)
27604 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080027605 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027606 }
27607
Marat Dukhan801d2c22021-06-02 21:25:05 -070027608 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, qmax) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070027609 TEST_REQUIRES_X86_SSE2;
27610 GemmMicrokernelTester()
27611 .mr(2)
27612 .nr(4)
27613 .kr(8)
27614 .sr(1)
27615 .m(2)
27616 .n(4)
27617 .k(8)
27618 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080027619 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027620 }
27621
Marat Dukhan801d2c22021-06-02 21:25:05 -070027622 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, strided_cm) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070027623 TEST_REQUIRES_X86_SSE2;
27624 GemmMicrokernelTester()
27625 .mr(2)
27626 .nr(4)
27627 .kr(8)
27628 .sr(1)
27629 .m(2)
27630 .n(4)
27631 .k(8)
27632 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080027633 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027634 }
27635#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
27636
27637
27638#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan801d2c22021-06-02 21:25:05 -070027639 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, k_eq_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070027640 TEST_REQUIRES_X86_SSSE3;
27641 GemmMicrokernelTester()
27642 .mr(3)
27643 .nr(4)
27644 .kr(8)
27645 .sr(1)
27646 .m(3)
27647 .n(4)
27648 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080027649 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027650 }
27651
Marat Dukhan801d2c22021-06-02 21:25:05 -070027652 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070027653 TEST_REQUIRES_X86_SSSE3;
27654 GemmMicrokernelTester()
27655 .mr(3)
27656 .nr(4)
27657 .kr(8)
27658 .sr(1)
27659 .m(3)
27660 .n(4)
27661 .k(8)
27662 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080027663 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027664 }
27665
Marat Dukhan801d2c22021-06-02 21:25:05 -070027666 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, k_eq_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070027667 TEST_REQUIRES_X86_SSSE3;
Zhi An Ng83844ae2022-01-14 09:52:25 -080027668 for (uint32_t n = 1; n <= 4; n++) {
27669 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070027670 GemmMicrokernelTester()
27671 .mr(3)
27672 .nr(4)
27673 .kr(8)
27674 .sr(1)
27675 .m(m)
27676 .n(n)
27677 .k(8)
27678 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027679 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027680 }
27681 }
27682 }
27683
Marat Dukhan801d2c22021-06-02 21:25:05 -070027684 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, k_eq_8_subtile_m) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070027685 TEST_REQUIRES_X86_SSSE3;
27686 for (uint32_t m = 1; m <= 3; m++) {
27687 GemmMicrokernelTester()
27688 .mr(3)
27689 .nr(4)
27690 .kr(8)
27691 .sr(1)
27692 .m(m)
27693 .n(4)
27694 .k(8)
27695 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027696 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027697 }
27698 }
27699
Marat Dukhan801d2c22021-06-02 21:25:05 -070027700 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, k_eq_8_subtile_n) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070027701 TEST_REQUIRES_X86_SSSE3;
27702 for (uint32_t n = 1; n <= 4; n++) {
27703 GemmMicrokernelTester()
27704 .mr(3)
27705 .nr(4)
27706 .kr(8)
27707 .sr(1)
27708 .m(3)
27709 .n(n)
27710 .k(8)
27711 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027712 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027713 }
27714 }
27715
Marat Dukhan801d2c22021-06-02 21:25:05 -070027716 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, k_lt_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070027717 TEST_REQUIRES_X86_SSSE3;
27718 for (size_t k = 1; k < 8; k++) {
27719 GemmMicrokernelTester()
27720 .mr(3)
27721 .nr(4)
27722 .kr(8)
27723 .sr(1)
27724 .m(3)
27725 .n(4)
27726 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080027727 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027728 }
27729 }
27730
Marat Dukhan801d2c22021-06-02 21:25:05 -070027731 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, k_lt_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070027732 TEST_REQUIRES_X86_SSSE3;
27733 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027734 for (uint32_t n = 1; n <= 4; n++) {
27735 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070027736 GemmMicrokernelTester()
27737 .mr(3)
27738 .nr(4)
27739 .kr(8)
27740 .sr(1)
27741 .m(m)
27742 .n(n)
27743 .k(k)
27744 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027745 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027746 }
27747 }
27748 }
27749 }
27750
Marat Dukhan801d2c22021-06-02 21:25:05 -070027751 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, k_gt_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070027752 TEST_REQUIRES_X86_SSSE3;
27753 for (size_t k = 9; k < 16; k++) {
27754 GemmMicrokernelTester()
27755 .mr(3)
27756 .nr(4)
27757 .kr(8)
27758 .sr(1)
27759 .m(3)
27760 .n(4)
27761 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080027762 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027763 }
27764 }
27765
Marat Dukhan801d2c22021-06-02 21:25:05 -070027766 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, k_gt_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070027767 TEST_REQUIRES_X86_SSSE3;
27768 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027769 for (uint32_t n = 1; n <= 4; n++) {
27770 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070027771 GemmMicrokernelTester()
27772 .mr(3)
27773 .nr(4)
27774 .kr(8)
27775 .sr(1)
27776 .m(m)
27777 .n(n)
27778 .k(k)
27779 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027780 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027781 }
27782 }
27783 }
27784 }
27785
Marat Dukhan801d2c22021-06-02 21:25:05 -070027786 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, k_div_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070027787 TEST_REQUIRES_X86_SSSE3;
27788 for (size_t k = 16; k <= 80; k += 8) {
27789 GemmMicrokernelTester()
27790 .mr(3)
27791 .nr(4)
27792 .kr(8)
27793 .sr(1)
27794 .m(3)
27795 .n(4)
27796 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080027797 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027798 }
27799 }
27800
Marat Dukhan801d2c22021-06-02 21:25:05 -070027801 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, k_div_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070027802 TEST_REQUIRES_X86_SSSE3;
27803 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027804 for (uint32_t n = 1; n <= 4; n++) {
27805 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070027806 GemmMicrokernelTester()
27807 .mr(3)
27808 .nr(4)
27809 .kr(8)
27810 .sr(1)
27811 .m(m)
27812 .n(n)
27813 .k(k)
27814 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027815 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027816 }
27817 }
27818 }
27819 }
27820
Marat Dukhan801d2c22021-06-02 21:25:05 -070027821 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, n_gt_4) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070027822 TEST_REQUIRES_X86_SSSE3;
27823 for (uint32_t n = 5; n < 8; n++) {
27824 for (size_t k = 1; k <= 40; k += 9) {
27825 GemmMicrokernelTester()
27826 .mr(3)
27827 .nr(4)
27828 .kr(8)
27829 .sr(1)
27830 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080027831 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070027832 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080027833 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027834 }
27835 }
27836 }
27837
Marat Dukhan801d2c22021-06-02 21:25:05 -070027838 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, n_gt_4_strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070027839 TEST_REQUIRES_X86_SSSE3;
27840 for (uint32_t n = 5; n < 8; n++) {
27841 for (size_t k = 1; k <= 40; k += 9) {
27842 GemmMicrokernelTester()
27843 .mr(3)
27844 .nr(4)
27845 .kr(8)
27846 .sr(1)
27847 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080027848 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070027849 .k(k)
27850 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080027851 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027852 }
27853 }
27854 }
27855
Marat Dukhan801d2c22021-06-02 21:25:05 -070027856 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, n_gt_4_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070027857 TEST_REQUIRES_X86_SSSE3;
27858 for (uint32_t n = 5; n < 8; n++) {
27859 for (size_t k = 1; k <= 40; k += 9) {
27860 for (uint32_t m = 1; m <= 3; m++) {
27861 GemmMicrokernelTester()
27862 .mr(3)
27863 .nr(4)
27864 .kr(8)
27865 .sr(1)
27866 .m(m)
27867 .n(n)
27868 .k(k)
27869 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027870 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027871 }
27872 }
27873 }
27874 }
27875
Marat Dukhan801d2c22021-06-02 21:25:05 -070027876 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, n_div_4) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070027877 TEST_REQUIRES_X86_SSSE3;
27878 for (uint32_t n = 8; n <= 12; n += 4) {
27879 for (size_t k = 1; k <= 40; k += 9) {
27880 GemmMicrokernelTester()
27881 .mr(3)
27882 .nr(4)
27883 .kr(8)
27884 .sr(1)
27885 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080027886 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070027887 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080027888 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027889 }
27890 }
27891 }
27892
Marat Dukhan801d2c22021-06-02 21:25:05 -070027893 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, n_div_4_strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070027894 TEST_REQUIRES_X86_SSSE3;
27895 for (uint32_t n = 8; n <= 12; n += 4) {
27896 for (size_t k = 1; k <= 40; k += 9) {
27897 GemmMicrokernelTester()
27898 .mr(3)
27899 .nr(4)
27900 .kr(8)
27901 .sr(1)
27902 .m(3)
27903 .n(n)
27904 .k(k)
27905 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080027906 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027907 }
27908 }
27909 }
27910
Marat Dukhan801d2c22021-06-02 21:25:05 -070027911 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, n_div_4_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070027912 TEST_REQUIRES_X86_SSSE3;
27913 for (uint32_t n = 8; n <= 12; n += 4) {
27914 for (size_t k = 1; k <= 40; k += 9) {
27915 for (uint32_t m = 1; m <= 3; m++) {
27916 GemmMicrokernelTester()
27917 .mr(3)
27918 .nr(4)
27919 .kr(8)
27920 .sr(1)
27921 .m(m)
27922 .n(n)
27923 .k(k)
27924 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027925 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027926 }
27927 }
27928 }
27929 }
27930
Marat Dukhan801d2c22021-06-02 21:25:05 -070027931 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, small_kernel) {
27932 TEST_REQUIRES_X86_SSSE3;
27933 for (size_t k = 1; k <= 40; k += 9) {
27934 GemmMicrokernelTester()
27935 .mr(3)
27936 .nr(4)
27937 .kr(8)
27938 .sr(1)
27939 .m(3)
27940 .n(4)
27941 .k(k)
27942 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080027943 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070027944 }
27945 }
27946
27947 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, small_kernel_subtile) {
27948 TEST_REQUIRES_X86_SSSE3;
27949 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027950 for (uint32_t n = 1; n <= 4; n++) {
27951 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan801d2c22021-06-02 21:25:05 -070027952 GemmMicrokernelTester()
27953 .mr(3)
27954 .nr(4)
27955 .kr(8)
27956 .sr(1)
27957 .m(m)
27958 .n(n)
27959 .k(k)
27960 .ks(3)
27961 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027962 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070027963 }
27964 }
27965 }
27966 }
27967
27968 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, n_gt_4_small_kernel) {
27969 TEST_REQUIRES_X86_SSSE3;
27970 for (uint32_t n = 5; n < 8; n++) {
27971 for (size_t k = 1; k <= 40; k += 9) {
27972 GemmMicrokernelTester()
27973 .mr(3)
27974 .nr(4)
27975 .kr(8)
27976 .sr(1)
27977 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080027978 .n(n)
Marat Dukhan801d2c22021-06-02 21:25:05 -070027979 .k(k)
27980 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080027981 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070027982 }
27983 }
27984 }
27985
27986 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, n_div_4_small_kernel) {
27987 TEST_REQUIRES_X86_SSSE3;
27988 for (uint32_t n = 8; n <= 12; n += 4) {
27989 for (size_t k = 1; k <= 40; k += 9) {
27990 GemmMicrokernelTester()
27991 .mr(3)
27992 .nr(4)
27993 .kr(8)
27994 .sr(1)
27995 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080027996 .n(n)
Marat Dukhan801d2c22021-06-02 21:25:05 -070027997 .k(k)
27998 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080027999 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070028000 }
28001 }
28002 }
28003
28004 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, strided_cm_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070028005 TEST_REQUIRES_X86_SSSE3;
28006 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028007 for (uint32_t n = 1; n <= 4; n++) {
28008 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070028009 GemmMicrokernelTester()
28010 .mr(3)
28011 .nr(4)
28012 .kr(8)
28013 .sr(1)
28014 .m(m)
28015 .n(n)
28016 .k(k)
28017 .cm_stride(7)
28018 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028019 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070028020 }
28021 }
28022 }
28023 }
28024
Marat Dukhan801d2c22021-06-02 21:25:05 -070028025 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, a_offset) {
28026 TEST_REQUIRES_X86_SSSE3;
28027 for (size_t k = 1; k <= 40; k += 9) {
28028 GemmMicrokernelTester()
28029 .mr(3)
28030 .nr(4)
28031 .kr(8)
28032 .sr(1)
28033 .m(3)
28034 .n(4)
28035 .k(k)
28036 .ks(3)
28037 .a_offset(127)
Marat Dukhan50323b82022-01-11 00:12:01 -080028038 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070028039 }
28040 }
28041
28042 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, zero) {
28043 TEST_REQUIRES_X86_SSSE3;
Zhi An Ng83844ae2022-01-14 09:52:25 -080028044 for (size_t k = 1; k <= 40; k += 9) {
28045 for (uint32_t mz = 0; mz < 3; mz++) {
Marat Dukhan801d2c22021-06-02 21:25:05 -070028046 GemmMicrokernelTester()
28047 .mr(3)
28048 .nr(4)
28049 .kr(8)
28050 .sr(1)
28051 .m(3)
28052 .n(4)
28053 .k(k)
28054 .ks(3)
28055 .a_offset(127)
28056 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080028057 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070028058 }
28059 }
28060 }
28061
28062 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, qmin) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070028063 TEST_REQUIRES_X86_SSSE3;
28064 GemmMicrokernelTester()
28065 .mr(3)
28066 .nr(4)
28067 .kr(8)
28068 .sr(1)
28069 .m(3)
28070 .n(4)
28071 .k(8)
28072 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080028073 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070028074 }
28075
Marat Dukhan801d2c22021-06-02 21:25:05 -070028076 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, qmax) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070028077 TEST_REQUIRES_X86_SSSE3;
28078 GemmMicrokernelTester()
28079 .mr(3)
28080 .nr(4)
28081 .kr(8)
28082 .sr(1)
28083 .m(3)
28084 .n(4)
28085 .k(8)
28086 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080028087 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070028088 }
28089
Marat Dukhan801d2c22021-06-02 21:25:05 -070028090 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, strided_cm) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070028091 TEST_REQUIRES_X86_SSSE3;
28092 GemmMicrokernelTester()
28093 .mr(3)
28094 .nr(4)
28095 .kr(8)
28096 .sr(1)
28097 .m(3)
28098 .n(4)
28099 .k(8)
28100 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080028101 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070028102 }
28103#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
28104
28105
28106#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan801d2c22021-06-02 21:25:05 -070028107 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_eq_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070028108 TEST_REQUIRES_X86_SSE41;
28109 GemmMicrokernelTester()
28110 .mr(3)
28111 .nr(4)
28112 .kr(8)
28113 .sr(1)
28114 .m(3)
28115 .n(4)
28116 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080028117 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070028118 }
28119
Marat Dukhan801d2c22021-06-02 21:25:05 -070028120 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070028121 TEST_REQUIRES_X86_SSE41;
28122 GemmMicrokernelTester()
28123 .mr(3)
28124 .nr(4)
28125 .kr(8)
28126 .sr(1)
28127 .m(3)
28128 .n(4)
28129 .k(8)
28130 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080028131 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070028132 }
28133
Marat Dukhan801d2c22021-06-02 21:25:05 -070028134 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_eq_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070028135 TEST_REQUIRES_X86_SSE41;
Zhi An Ng83844ae2022-01-14 09:52:25 -080028136 for (uint32_t n = 1; n <= 4; n++) {
28137 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070028138 GemmMicrokernelTester()
28139 .mr(3)
28140 .nr(4)
28141 .kr(8)
28142 .sr(1)
28143 .m(m)
28144 .n(n)
28145 .k(8)
28146 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028147 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070028148 }
28149 }
28150 }
28151
Marat Dukhan801d2c22021-06-02 21:25:05 -070028152 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_eq_8_subtile_m) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070028153 TEST_REQUIRES_X86_SSE41;
28154 for (uint32_t m = 1; m <= 3; m++) {
28155 GemmMicrokernelTester()
28156 .mr(3)
28157 .nr(4)
28158 .kr(8)
28159 .sr(1)
28160 .m(m)
28161 .n(4)
28162 .k(8)
28163 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028164 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070028165 }
28166 }
28167
Marat Dukhan801d2c22021-06-02 21:25:05 -070028168 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_eq_8_subtile_n) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070028169 TEST_REQUIRES_X86_SSE41;
28170 for (uint32_t n = 1; n <= 4; n++) {
28171 GemmMicrokernelTester()
28172 .mr(3)
28173 .nr(4)
28174 .kr(8)
28175 .sr(1)
28176 .m(3)
28177 .n(n)
28178 .k(8)
28179 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028180 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070028181 }
28182 }
28183
Marat Dukhan801d2c22021-06-02 21:25:05 -070028184 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_lt_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070028185 TEST_REQUIRES_X86_SSE41;
28186 for (size_t k = 1; k < 8; k++) {
28187 GemmMicrokernelTester()
28188 .mr(3)
28189 .nr(4)
28190 .kr(8)
28191 .sr(1)
28192 .m(3)
28193 .n(4)
28194 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080028195 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070028196 }
28197 }
28198
Marat Dukhan801d2c22021-06-02 21:25:05 -070028199 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_lt_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070028200 TEST_REQUIRES_X86_SSE41;
28201 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028202 for (uint32_t n = 1; n <= 4; n++) {
28203 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070028204 GemmMicrokernelTester()
28205 .mr(3)
28206 .nr(4)
28207 .kr(8)
28208 .sr(1)
28209 .m(m)
28210 .n(n)
28211 .k(k)
28212 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028213 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070028214 }
28215 }
28216 }
28217 }
28218
Marat Dukhan801d2c22021-06-02 21:25:05 -070028219 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_gt_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070028220 TEST_REQUIRES_X86_SSE41;
28221 for (size_t k = 9; k < 16; k++) {
28222 GemmMicrokernelTester()
28223 .mr(3)
28224 .nr(4)
28225 .kr(8)
28226 .sr(1)
28227 .m(3)
28228 .n(4)
28229 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080028230 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070028231 }
28232 }
28233
Marat Dukhan801d2c22021-06-02 21:25:05 -070028234 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_gt_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070028235 TEST_REQUIRES_X86_SSE41;
28236 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028237 for (uint32_t n = 1; n <= 4; n++) {
28238 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070028239 GemmMicrokernelTester()
28240 .mr(3)
28241 .nr(4)
28242 .kr(8)
28243 .sr(1)
28244 .m(m)
28245 .n(n)
28246 .k(k)
28247 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028248 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070028249 }
28250 }
28251 }
28252 }
28253
Marat Dukhan801d2c22021-06-02 21:25:05 -070028254 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_div_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070028255 TEST_REQUIRES_X86_SSE41;
28256 for (size_t k = 16; k <= 80; k += 8) {
28257 GemmMicrokernelTester()
28258 .mr(3)
28259 .nr(4)
28260 .kr(8)
28261 .sr(1)
28262 .m(3)
28263 .n(4)
28264 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080028265 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070028266 }
28267 }
28268
Marat Dukhan801d2c22021-06-02 21:25:05 -070028269 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_div_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070028270 TEST_REQUIRES_X86_SSE41;
28271 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028272 for (uint32_t n = 1; n <= 4; n++) {
28273 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070028274 GemmMicrokernelTester()
28275 .mr(3)
28276 .nr(4)
28277 .kr(8)
28278 .sr(1)
28279 .m(m)
28280 .n(n)
28281 .k(k)
28282 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028283 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070028284 }
28285 }
28286 }
28287 }
28288
Marat Dukhan801d2c22021-06-02 21:25:05 -070028289 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, n_gt_4) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070028290 TEST_REQUIRES_X86_SSE41;
28291 for (uint32_t n = 5; n < 8; n++) {
28292 for (size_t k = 1; k <= 40; k += 9) {
28293 GemmMicrokernelTester()
28294 .mr(3)
28295 .nr(4)
28296 .kr(8)
28297 .sr(1)
28298 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080028299 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070028300 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080028301 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070028302 }
28303 }
28304 }
28305
Marat Dukhan801d2c22021-06-02 21:25:05 -070028306 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, n_gt_4_strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070028307 TEST_REQUIRES_X86_SSE41;
28308 for (uint32_t n = 5; n < 8; n++) {
28309 for (size_t k = 1; k <= 40; k += 9) {
28310 GemmMicrokernelTester()
28311 .mr(3)
28312 .nr(4)
28313 .kr(8)
28314 .sr(1)
28315 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080028316 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070028317 .k(k)
28318 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080028319 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070028320 }
28321 }
28322 }
28323
Marat Dukhan801d2c22021-06-02 21:25:05 -070028324 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, n_gt_4_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070028325 TEST_REQUIRES_X86_SSE41;
28326 for (uint32_t n = 5; n < 8; n++) {
28327 for (size_t k = 1; k <= 40; k += 9) {
28328 for (uint32_t m = 1; m <= 3; m++) {
28329 GemmMicrokernelTester()
28330 .mr(3)
28331 .nr(4)
28332 .kr(8)
28333 .sr(1)
28334 .m(m)
28335 .n(n)
28336 .k(k)
28337 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028338 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070028339 }
28340 }
28341 }
28342 }
28343
Marat Dukhan801d2c22021-06-02 21:25:05 -070028344 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, n_div_4) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070028345 TEST_REQUIRES_X86_SSE41;
28346 for (uint32_t n = 8; n <= 12; n += 4) {
28347 for (size_t k = 1; k <= 40; k += 9) {
28348 GemmMicrokernelTester()
28349 .mr(3)
28350 .nr(4)
28351 .kr(8)
28352 .sr(1)
28353 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080028354 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070028355 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080028356 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070028357 }
28358 }
28359 }
28360
Marat Dukhan801d2c22021-06-02 21:25:05 -070028361 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, n_div_4_strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070028362 TEST_REQUIRES_X86_SSE41;
28363 for (uint32_t n = 8; n <= 12; n += 4) {
28364 for (size_t k = 1; k <= 40; k += 9) {
28365 GemmMicrokernelTester()
28366 .mr(3)
28367 .nr(4)
28368 .kr(8)
28369 .sr(1)
28370 .m(3)
28371 .n(n)
28372 .k(k)
28373 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080028374 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070028375 }
28376 }
28377 }
28378
Marat Dukhan801d2c22021-06-02 21:25:05 -070028379 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, n_div_4_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070028380 TEST_REQUIRES_X86_SSE41;
28381 for (uint32_t n = 8; n <= 12; n += 4) {
28382 for (size_t k = 1; k <= 40; k += 9) {
28383 for (uint32_t m = 1; m <= 3; m++) {
28384 GemmMicrokernelTester()
28385 .mr(3)
28386 .nr(4)
28387 .kr(8)
28388 .sr(1)
28389 .m(m)
28390 .n(n)
28391 .k(k)
28392 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028393 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070028394 }
28395 }
28396 }
28397 }
28398
Marat Dukhan801d2c22021-06-02 21:25:05 -070028399 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, small_kernel) {
28400 TEST_REQUIRES_X86_SSE41;
28401 for (size_t k = 1; k <= 40; k += 9) {
28402 GemmMicrokernelTester()
28403 .mr(3)
28404 .nr(4)
28405 .kr(8)
28406 .sr(1)
28407 .m(3)
28408 .n(4)
28409 .k(k)
28410 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080028411 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070028412 }
28413 }
28414
28415 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, small_kernel_subtile) {
28416 TEST_REQUIRES_X86_SSE41;
28417 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028418 for (uint32_t n = 1; n <= 4; n++) {
28419 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan801d2c22021-06-02 21:25:05 -070028420 GemmMicrokernelTester()
28421 .mr(3)
28422 .nr(4)
28423 .kr(8)
28424 .sr(1)
28425 .m(m)
28426 .n(n)
28427 .k(k)
28428 .ks(3)
28429 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028430 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070028431 }
28432 }
28433 }
28434 }
28435
28436 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, n_gt_4_small_kernel) {
28437 TEST_REQUIRES_X86_SSE41;
28438 for (uint32_t n = 5; n < 8; n++) {
28439 for (size_t k = 1; k <= 40; k += 9) {
28440 GemmMicrokernelTester()
28441 .mr(3)
28442 .nr(4)
28443 .kr(8)
28444 .sr(1)
28445 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080028446 .n(n)
Marat Dukhan801d2c22021-06-02 21:25:05 -070028447 .k(k)
28448 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080028449 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070028450 }
28451 }
28452 }
28453
28454 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, n_div_4_small_kernel) {
28455 TEST_REQUIRES_X86_SSE41;
28456 for (uint32_t n = 8; n <= 12; n += 4) {
28457 for (size_t k = 1; k <= 40; k += 9) {
28458 GemmMicrokernelTester()
28459 .mr(3)
28460 .nr(4)
28461 .kr(8)
28462 .sr(1)
28463 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080028464 .n(n)
Marat Dukhan801d2c22021-06-02 21:25:05 -070028465 .k(k)
28466 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080028467 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070028468 }
28469 }
28470 }
28471
28472 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, strided_cm_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070028473 TEST_REQUIRES_X86_SSE41;
28474 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028475 for (uint32_t n = 1; n <= 4; n++) {
28476 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070028477 GemmMicrokernelTester()
28478 .mr(3)
28479 .nr(4)
28480 .kr(8)
28481 .sr(1)
28482 .m(m)
28483 .n(n)
28484 .k(k)
28485 .cm_stride(7)
28486 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028487 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070028488 }
28489 }
28490 }
28491 }
28492
Marat Dukhan801d2c22021-06-02 21:25:05 -070028493 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, a_offset) {
28494 TEST_REQUIRES_X86_SSE41;
28495 for (size_t k = 1; k <= 40; k += 9) {
28496 GemmMicrokernelTester()
28497 .mr(3)
28498 .nr(4)
28499 .kr(8)
28500 .sr(1)
28501 .m(3)
28502 .n(4)
28503 .k(k)
28504 .ks(3)
28505 .a_offset(127)
Marat Dukhan50323b82022-01-11 00:12:01 -080028506 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070028507 }
28508 }
28509
28510 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, zero) {
28511 TEST_REQUIRES_X86_SSE41;
Zhi An Ng83844ae2022-01-14 09:52:25 -080028512 for (size_t k = 1; k <= 40; k += 9) {
28513 for (uint32_t mz = 0; mz < 3; mz++) {
Marat Dukhan801d2c22021-06-02 21:25:05 -070028514 GemmMicrokernelTester()
28515 .mr(3)
28516 .nr(4)
28517 .kr(8)
28518 .sr(1)
28519 .m(3)
28520 .n(4)
28521 .k(k)
28522 .ks(3)
28523 .a_offset(127)
28524 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080028525 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070028526 }
28527 }
28528 }
28529
28530 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, qmin) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070028531 TEST_REQUIRES_X86_SSE41;
28532 GemmMicrokernelTester()
28533 .mr(3)
28534 .nr(4)
28535 .kr(8)
28536 .sr(1)
28537 .m(3)
28538 .n(4)
28539 .k(8)
28540 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080028541 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070028542 }
28543
Marat Dukhan801d2c22021-06-02 21:25:05 -070028544 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, qmax) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070028545 TEST_REQUIRES_X86_SSE41;
28546 GemmMicrokernelTester()
28547 .mr(3)
28548 .nr(4)
28549 .kr(8)
28550 .sr(1)
28551 .m(3)
28552 .n(4)
28553 .k(8)
28554 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080028555 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070028556 }
28557
Marat Dukhan801d2c22021-06-02 21:25:05 -070028558 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, strided_cm) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070028559 TEST_REQUIRES_X86_SSE41;
28560 GemmMicrokernelTester()
28561 .mr(3)
28562 .nr(4)
28563 .kr(8)
28564 .sr(1)
28565 .m(3)
28566 .n(4)
28567 .k(8)
28568 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080028569 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070028570 }
28571#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
28572
28573
28574#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan801d2c22021-06-02 21:25:05 -070028575 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_eq_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070028576 TEST_REQUIRES_X86_AVX;
28577 GemmMicrokernelTester()
28578 .mr(1)
28579 .nr(4)
28580 .kr(8)
28581 .sr(1)
28582 .m(1)
28583 .n(4)
28584 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080028585 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070028586 }
28587
Marat Dukhan801d2c22021-06-02 21:25:05 -070028588 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070028589 TEST_REQUIRES_X86_AVX;
28590 GemmMicrokernelTester()
28591 .mr(1)
28592 .nr(4)
28593 .kr(8)
28594 .sr(1)
28595 .m(1)
28596 .n(4)
28597 .k(8)
28598 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080028599 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070028600 }
28601
Marat Dukhan801d2c22021-06-02 21:25:05 -070028602 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_eq_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070028603 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080028604 for (uint32_t n = 1; n <= 4; n++) {
28605 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070028606 GemmMicrokernelTester()
28607 .mr(1)
28608 .nr(4)
28609 .kr(8)
28610 .sr(1)
28611 .m(m)
28612 .n(n)
28613 .k(8)
28614 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028615 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070028616 }
28617 }
28618 }
28619
Marat Dukhan801d2c22021-06-02 21:25:05 -070028620 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_eq_8_subtile_m) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070028621 TEST_REQUIRES_X86_AVX;
28622 for (uint32_t m = 1; m <= 1; m++) {
28623 GemmMicrokernelTester()
28624 .mr(1)
28625 .nr(4)
28626 .kr(8)
28627 .sr(1)
28628 .m(m)
28629 .n(4)
28630 .k(8)
28631 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028632 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070028633 }
28634 }
28635
Marat Dukhan801d2c22021-06-02 21:25:05 -070028636 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_eq_8_subtile_n) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070028637 TEST_REQUIRES_X86_AVX;
28638 for (uint32_t n = 1; n <= 4; n++) {
28639 GemmMicrokernelTester()
28640 .mr(1)
28641 .nr(4)
28642 .kr(8)
28643 .sr(1)
28644 .m(1)
28645 .n(n)
28646 .k(8)
28647 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028648 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070028649 }
28650 }
28651
Marat Dukhan801d2c22021-06-02 21:25:05 -070028652 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_lt_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070028653 TEST_REQUIRES_X86_AVX;
28654 for (size_t k = 1; k < 8; k++) {
28655 GemmMicrokernelTester()
28656 .mr(1)
28657 .nr(4)
28658 .kr(8)
28659 .sr(1)
28660 .m(1)
28661 .n(4)
28662 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080028663 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070028664 }
28665 }
28666
Marat Dukhan801d2c22021-06-02 21:25:05 -070028667 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_lt_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070028668 TEST_REQUIRES_X86_AVX;
28669 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028670 for (uint32_t n = 1; n <= 4; n++) {
28671 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070028672 GemmMicrokernelTester()
28673 .mr(1)
28674 .nr(4)
28675 .kr(8)
28676 .sr(1)
28677 .m(m)
28678 .n(n)
28679 .k(k)
28680 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028681 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070028682 }
28683 }
28684 }
28685 }
28686
Marat Dukhan801d2c22021-06-02 21:25:05 -070028687 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_gt_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070028688 TEST_REQUIRES_X86_AVX;
28689 for (size_t k = 9; k < 16; k++) {
28690 GemmMicrokernelTester()
28691 .mr(1)
28692 .nr(4)
28693 .kr(8)
28694 .sr(1)
28695 .m(1)
28696 .n(4)
28697 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080028698 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070028699 }
28700 }
28701
Marat Dukhan801d2c22021-06-02 21:25:05 -070028702 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_gt_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070028703 TEST_REQUIRES_X86_AVX;
28704 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028705 for (uint32_t n = 1; n <= 4; n++) {
28706 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070028707 GemmMicrokernelTester()
28708 .mr(1)
28709 .nr(4)
28710 .kr(8)
28711 .sr(1)
28712 .m(m)
28713 .n(n)
28714 .k(k)
28715 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028716 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070028717 }
28718 }
28719 }
28720 }
28721
Marat Dukhan801d2c22021-06-02 21:25:05 -070028722 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_div_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070028723 TEST_REQUIRES_X86_AVX;
28724 for (size_t k = 16; k <= 80; k += 8) {
28725 GemmMicrokernelTester()
28726 .mr(1)
28727 .nr(4)
28728 .kr(8)
28729 .sr(1)
28730 .m(1)
28731 .n(4)
28732 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080028733 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070028734 }
28735 }
28736
Marat Dukhan801d2c22021-06-02 21:25:05 -070028737 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_div_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070028738 TEST_REQUIRES_X86_AVX;
28739 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028740 for (uint32_t n = 1; n <= 4; n++) {
28741 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070028742 GemmMicrokernelTester()
28743 .mr(1)
28744 .nr(4)
28745 .kr(8)
28746 .sr(1)
28747 .m(m)
28748 .n(n)
28749 .k(k)
28750 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028751 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070028752 }
28753 }
28754 }
28755 }
28756
Marat Dukhan801d2c22021-06-02 21:25:05 -070028757 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_gt_4) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070028758 TEST_REQUIRES_X86_AVX;
28759 for (uint32_t n = 5; n < 8; n++) {
28760 for (size_t k = 1; k <= 40; k += 9) {
28761 GemmMicrokernelTester()
28762 .mr(1)
28763 .nr(4)
28764 .kr(8)
28765 .sr(1)
28766 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080028767 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070028768 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080028769 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070028770 }
28771 }
28772 }
28773
Marat Dukhan801d2c22021-06-02 21:25:05 -070028774 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_gt_4_strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070028775 TEST_REQUIRES_X86_AVX;
28776 for (uint32_t n = 5; n < 8; n++) {
28777 for (size_t k = 1; k <= 40; k += 9) {
28778 GemmMicrokernelTester()
28779 .mr(1)
28780 .nr(4)
28781 .kr(8)
28782 .sr(1)
28783 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080028784 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070028785 .k(k)
28786 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080028787 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070028788 }
28789 }
28790 }
28791
Marat Dukhan801d2c22021-06-02 21:25:05 -070028792 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_gt_4_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070028793 TEST_REQUIRES_X86_AVX;
28794 for (uint32_t n = 5; n < 8; n++) {
28795 for (size_t k = 1; k <= 40; k += 9) {
28796 for (uint32_t m = 1; m <= 1; m++) {
28797 GemmMicrokernelTester()
28798 .mr(1)
28799 .nr(4)
28800 .kr(8)
28801 .sr(1)
28802 .m(m)
28803 .n(n)
28804 .k(k)
28805 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028806 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070028807 }
28808 }
28809 }
28810 }
28811
Marat Dukhan801d2c22021-06-02 21:25:05 -070028812 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_div_4) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070028813 TEST_REQUIRES_X86_AVX;
28814 for (uint32_t n = 8; n <= 12; n += 4) {
28815 for (size_t k = 1; k <= 40; k += 9) {
28816 GemmMicrokernelTester()
28817 .mr(1)
28818 .nr(4)
28819 .kr(8)
28820 .sr(1)
28821 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080028822 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070028823 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080028824 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070028825 }
28826 }
28827 }
28828
Marat Dukhan801d2c22021-06-02 21:25:05 -070028829 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_div_4_strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070028830 TEST_REQUIRES_X86_AVX;
28831 for (uint32_t n = 8; n <= 12; n += 4) {
28832 for (size_t k = 1; k <= 40; k += 9) {
28833 GemmMicrokernelTester()
28834 .mr(1)
28835 .nr(4)
28836 .kr(8)
28837 .sr(1)
28838 .m(1)
28839 .n(n)
28840 .k(k)
28841 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080028842 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070028843 }
28844 }
28845 }
28846
Marat Dukhan801d2c22021-06-02 21:25:05 -070028847 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_div_4_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070028848 TEST_REQUIRES_X86_AVX;
28849 for (uint32_t n = 8; n <= 12; n += 4) {
28850 for (size_t k = 1; k <= 40; k += 9) {
28851 for (uint32_t m = 1; m <= 1; m++) {
28852 GemmMicrokernelTester()
28853 .mr(1)
28854 .nr(4)
28855 .kr(8)
28856 .sr(1)
28857 .m(m)
28858 .n(n)
28859 .k(k)
28860 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028861 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070028862 }
28863 }
28864 }
28865 }
28866
Marat Dukhan801d2c22021-06-02 21:25:05 -070028867 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, small_kernel) {
28868 TEST_REQUIRES_X86_AVX;
28869 for (size_t k = 1; k <= 40; k += 9) {
28870 GemmMicrokernelTester()
28871 .mr(1)
28872 .nr(4)
28873 .kr(8)
28874 .sr(1)
28875 .m(1)
28876 .n(4)
28877 .k(k)
28878 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080028879 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070028880 }
28881 }
28882
28883 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, small_kernel_subtile) {
28884 TEST_REQUIRES_X86_AVX;
28885 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028886 for (uint32_t n = 1; n <= 4; n++) {
28887 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan801d2c22021-06-02 21:25:05 -070028888 GemmMicrokernelTester()
28889 .mr(1)
28890 .nr(4)
28891 .kr(8)
28892 .sr(1)
28893 .m(m)
28894 .n(n)
28895 .k(k)
28896 .ks(3)
28897 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028898 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070028899 }
28900 }
28901 }
28902 }
28903
28904 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_gt_4_small_kernel) {
28905 TEST_REQUIRES_X86_AVX;
28906 for (uint32_t n = 5; n < 8; n++) {
28907 for (size_t k = 1; k <= 40; k += 9) {
28908 GemmMicrokernelTester()
28909 .mr(1)
28910 .nr(4)
28911 .kr(8)
28912 .sr(1)
28913 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080028914 .n(n)
Marat Dukhan801d2c22021-06-02 21:25:05 -070028915 .k(k)
28916 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080028917 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070028918 }
28919 }
28920 }
28921
28922 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_div_4_small_kernel) {
28923 TEST_REQUIRES_X86_AVX;
28924 for (uint32_t n = 8; n <= 12; n += 4) {
28925 for (size_t k = 1; k <= 40; k += 9) {
28926 GemmMicrokernelTester()
28927 .mr(1)
28928 .nr(4)
28929 .kr(8)
28930 .sr(1)
28931 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080028932 .n(n)
Marat Dukhan801d2c22021-06-02 21:25:05 -070028933 .k(k)
28934 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080028935 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070028936 }
28937 }
28938 }
28939
28940 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, strided_cm_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070028941 TEST_REQUIRES_X86_AVX;
28942 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028943 for (uint32_t n = 1; n <= 4; n++) {
28944 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070028945 GemmMicrokernelTester()
28946 .mr(1)
28947 .nr(4)
28948 .kr(8)
28949 .sr(1)
28950 .m(m)
28951 .n(n)
28952 .k(k)
28953 .cm_stride(7)
28954 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028955 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070028956 }
28957 }
28958 }
28959 }
28960
Marat Dukhan801d2c22021-06-02 21:25:05 -070028961 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, a_offset) {
28962 TEST_REQUIRES_X86_AVX;
28963 for (size_t k = 1; k <= 40; k += 9) {
28964 GemmMicrokernelTester()
28965 .mr(1)
28966 .nr(4)
28967 .kr(8)
28968 .sr(1)
28969 .m(1)
28970 .n(4)
28971 .k(k)
28972 .ks(3)
28973 .a_offset(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080028974 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070028975 }
28976 }
28977
28978 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, zero) {
28979 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080028980 for (size_t k = 1; k <= 40; k += 9) {
28981 for (uint32_t mz = 0; mz < 1; mz++) {
Marat Dukhan801d2c22021-06-02 21:25:05 -070028982 GemmMicrokernelTester()
28983 .mr(1)
28984 .nr(4)
28985 .kr(8)
28986 .sr(1)
28987 .m(1)
28988 .n(4)
28989 .k(k)
28990 .ks(3)
28991 .a_offset(43)
28992 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080028993 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070028994 }
28995 }
28996 }
28997
28998 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, qmin) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070028999 TEST_REQUIRES_X86_AVX;
29000 GemmMicrokernelTester()
29001 .mr(1)
29002 .nr(4)
29003 .kr(8)
29004 .sr(1)
29005 .m(1)
29006 .n(4)
29007 .k(8)
29008 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080029009 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070029010 }
29011
Marat Dukhan801d2c22021-06-02 21:25:05 -070029012 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, qmax) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070029013 TEST_REQUIRES_X86_AVX;
29014 GemmMicrokernelTester()
29015 .mr(1)
29016 .nr(4)
29017 .kr(8)
29018 .sr(1)
29019 .m(1)
29020 .n(4)
29021 .k(8)
29022 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080029023 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070029024 }
29025
Marat Dukhan801d2c22021-06-02 21:25:05 -070029026 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, strided_cm) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070029027 TEST_REQUIRES_X86_AVX;
29028 GemmMicrokernelTester()
29029 .mr(1)
29030 .nr(4)
29031 .kr(8)
29032 .sr(1)
29033 .m(1)
29034 .n(4)
29035 .k(8)
29036 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080029037 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070029038 }
29039#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
29040
29041
29042#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan801d2c22021-06-02 21:25:05 -070029043 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_eq_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070029044 TEST_REQUIRES_X86_AVX;
29045 GemmMicrokernelTester()
29046 .mr(2)
29047 .nr(4)
29048 .kr(8)
29049 .sr(1)
29050 .m(2)
29051 .n(4)
29052 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080029053 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070029054 }
29055
Marat Dukhan801d2c22021-06-02 21:25:05 -070029056 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070029057 TEST_REQUIRES_X86_AVX;
29058 GemmMicrokernelTester()
29059 .mr(2)
29060 .nr(4)
29061 .kr(8)
29062 .sr(1)
29063 .m(2)
29064 .n(4)
29065 .k(8)
29066 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080029067 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070029068 }
29069
Marat Dukhan801d2c22021-06-02 21:25:05 -070029070 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_eq_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070029071 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080029072 for (uint32_t n = 1; n <= 4; n++) {
29073 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070029074 GemmMicrokernelTester()
29075 .mr(2)
29076 .nr(4)
29077 .kr(8)
29078 .sr(1)
29079 .m(m)
29080 .n(n)
29081 .k(8)
29082 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029083 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070029084 }
29085 }
29086 }
29087
Marat Dukhan801d2c22021-06-02 21:25:05 -070029088 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_eq_8_subtile_m) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070029089 TEST_REQUIRES_X86_AVX;
29090 for (uint32_t m = 1; m <= 2; m++) {
29091 GemmMicrokernelTester()
29092 .mr(2)
29093 .nr(4)
29094 .kr(8)
29095 .sr(1)
29096 .m(m)
29097 .n(4)
29098 .k(8)
29099 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029100 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070029101 }
29102 }
29103
Marat Dukhan801d2c22021-06-02 21:25:05 -070029104 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_eq_8_subtile_n) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070029105 TEST_REQUIRES_X86_AVX;
29106 for (uint32_t n = 1; n <= 4; n++) {
29107 GemmMicrokernelTester()
29108 .mr(2)
29109 .nr(4)
29110 .kr(8)
29111 .sr(1)
29112 .m(2)
29113 .n(n)
29114 .k(8)
29115 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029116 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070029117 }
29118 }
29119
Marat Dukhan801d2c22021-06-02 21:25:05 -070029120 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_lt_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070029121 TEST_REQUIRES_X86_AVX;
29122 for (size_t k = 1; k < 8; k++) {
29123 GemmMicrokernelTester()
29124 .mr(2)
29125 .nr(4)
29126 .kr(8)
29127 .sr(1)
29128 .m(2)
29129 .n(4)
29130 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080029131 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070029132 }
29133 }
29134
Marat Dukhan801d2c22021-06-02 21:25:05 -070029135 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_lt_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070029136 TEST_REQUIRES_X86_AVX;
29137 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029138 for (uint32_t n = 1; n <= 4; n++) {
29139 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070029140 GemmMicrokernelTester()
29141 .mr(2)
29142 .nr(4)
29143 .kr(8)
29144 .sr(1)
29145 .m(m)
29146 .n(n)
29147 .k(k)
29148 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029149 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070029150 }
29151 }
29152 }
29153 }
29154
Marat Dukhan801d2c22021-06-02 21:25:05 -070029155 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_gt_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070029156 TEST_REQUIRES_X86_AVX;
29157 for (size_t k = 9; k < 16; k++) {
29158 GemmMicrokernelTester()
29159 .mr(2)
29160 .nr(4)
29161 .kr(8)
29162 .sr(1)
29163 .m(2)
29164 .n(4)
29165 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080029166 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070029167 }
29168 }
29169
Marat Dukhan801d2c22021-06-02 21:25:05 -070029170 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_gt_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070029171 TEST_REQUIRES_X86_AVX;
29172 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029173 for (uint32_t n = 1; n <= 4; n++) {
29174 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070029175 GemmMicrokernelTester()
29176 .mr(2)
29177 .nr(4)
29178 .kr(8)
29179 .sr(1)
29180 .m(m)
29181 .n(n)
29182 .k(k)
29183 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029184 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070029185 }
29186 }
29187 }
29188 }
29189
Marat Dukhan801d2c22021-06-02 21:25:05 -070029190 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_div_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070029191 TEST_REQUIRES_X86_AVX;
29192 for (size_t k = 16; k <= 80; k += 8) {
29193 GemmMicrokernelTester()
29194 .mr(2)
29195 .nr(4)
29196 .kr(8)
29197 .sr(1)
29198 .m(2)
29199 .n(4)
29200 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080029201 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070029202 }
29203 }
29204
Marat Dukhan801d2c22021-06-02 21:25:05 -070029205 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_div_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070029206 TEST_REQUIRES_X86_AVX;
29207 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029208 for (uint32_t n = 1; n <= 4; n++) {
29209 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070029210 GemmMicrokernelTester()
29211 .mr(2)
29212 .nr(4)
29213 .kr(8)
29214 .sr(1)
29215 .m(m)
29216 .n(n)
29217 .k(k)
29218 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029219 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070029220 }
29221 }
29222 }
29223 }
29224
Marat Dukhan801d2c22021-06-02 21:25:05 -070029225 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_gt_4) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070029226 TEST_REQUIRES_X86_AVX;
29227 for (uint32_t n = 5; n < 8; n++) {
29228 for (size_t k = 1; k <= 40; k += 9) {
29229 GemmMicrokernelTester()
29230 .mr(2)
29231 .nr(4)
29232 .kr(8)
29233 .sr(1)
29234 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080029235 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070029236 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080029237 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070029238 }
29239 }
29240 }
29241
Marat Dukhan801d2c22021-06-02 21:25:05 -070029242 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_gt_4_strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070029243 TEST_REQUIRES_X86_AVX;
29244 for (uint32_t n = 5; n < 8; n++) {
29245 for (size_t k = 1; k <= 40; k += 9) {
29246 GemmMicrokernelTester()
29247 .mr(2)
29248 .nr(4)
29249 .kr(8)
29250 .sr(1)
29251 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080029252 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070029253 .k(k)
29254 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080029255 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070029256 }
29257 }
29258 }
29259
Marat Dukhan801d2c22021-06-02 21:25:05 -070029260 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_gt_4_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070029261 TEST_REQUIRES_X86_AVX;
29262 for (uint32_t n = 5; n < 8; n++) {
29263 for (size_t k = 1; k <= 40; k += 9) {
29264 for (uint32_t m = 1; m <= 2; m++) {
29265 GemmMicrokernelTester()
29266 .mr(2)
29267 .nr(4)
29268 .kr(8)
29269 .sr(1)
29270 .m(m)
29271 .n(n)
29272 .k(k)
29273 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029274 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070029275 }
29276 }
29277 }
29278 }
29279
Marat Dukhan801d2c22021-06-02 21:25:05 -070029280 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_div_4) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070029281 TEST_REQUIRES_X86_AVX;
29282 for (uint32_t n = 8; n <= 12; n += 4) {
29283 for (size_t k = 1; k <= 40; k += 9) {
29284 GemmMicrokernelTester()
29285 .mr(2)
29286 .nr(4)
29287 .kr(8)
29288 .sr(1)
29289 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080029290 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070029291 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080029292 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070029293 }
29294 }
29295 }
29296
Marat Dukhan801d2c22021-06-02 21:25:05 -070029297 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_div_4_strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070029298 TEST_REQUIRES_X86_AVX;
29299 for (uint32_t n = 8; n <= 12; n += 4) {
29300 for (size_t k = 1; k <= 40; k += 9) {
29301 GemmMicrokernelTester()
29302 .mr(2)
29303 .nr(4)
29304 .kr(8)
29305 .sr(1)
29306 .m(2)
29307 .n(n)
29308 .k(k)
29309 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080029310 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070029311 }
29312 }
29313 }
29314
Marat Dukhan801d2c22021-06-02 21:25:05 -070029315 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_div_4_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070029316 TEST_REQUIRES_X86_AVX;
29317 for (uint32_t n = 8; n <= 12; n += 4) {
29318 for (size_t k = 1; k <= 40; k += 9) {
29319 for (uint32_t m = 1; m <= 2; m++) {
29320 GemmMicrokernelTester()
29321 .mr(2)
29322 .nr(4)
29323 .kr(8)
29324 .sr(1)
29325 .m(m)
29326 .n(n)
29327 .k(k)
29328 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029329 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070029330 }
29331 }
29332 }
29333 }
29334
Marat Dukhan801d2c22021-06-02 21:25:05 -070029335 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, small_kernel) {
29336 TEST_REQUIRES_X86_AVX;
29337 for (size_t k = 1; k <= 40; k += 9) {
29338 GemmMicrokernelTester()
29339 .mr(2)
29340 .nr(4)
29341 .kr(8)
29342 .sr(1)
29343 .m(2)
29344 .n(4)
29345 .k(k)
29346 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080029347 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070029348 }
29349 }
29350
29351 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, small_kernel_subtile) {
29352 TEST_REQUIRES_X86_AVX;
29353 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029354 for (uint32_t n = 1; n <= 4; n++) {
29355 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan801d2c22021-06-02 21:25:05 -070029356 GemmMicrokernelTester()
29357 .mr(2)
29358 .nr(4)
29359 .kr(8)
29360 .sr(1)
29361 .m(m)
29362 .n(n)
29363 .k(k)
29364 .ks(3)
29365 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029366 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070029367 }
29368 }
29369 }
29370 }
29371
29372 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_gt_4_small_kernel) {
29373 TEST_REQUIRES_X86_AVX;
29374 for (uint32_t n = 5; n < 8; n++) {
29375 for (size_t k = 1; k <= 40; k += 9) {
29376 GemmMicrokernelTester()
29377 .mr(2)
29378 .nr(4)
29379 .kr(8)
29380 .sr(1)
29381 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080029382 .n(n)
Marat Dukhan801d2c22021-06-02 21:25:05 -070029383 .k(k)
29384 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080029385 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070029386 }
29387 }
29388 }
29389
29390 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_div_4_small_kernel) {
29391 TEST_REQUIRES_X86_AVX;
29392 for (uint32_t n = 8; n <= 12; n += 4) {
29393 for (size_t k = 1; k <= 40; k += 9) {
29394 GemmMicrokernelTester()
29395 .mr(2)
29396 .nr(4)
29397 .kr(8)
29398 .sr(1)
29399 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080029400 .n(n)
Marat Dukhan801d2c22021-06-02 21:25:05 -070029401 .k(k)
29402 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080029403 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070029404 }
29405 }
29406 }
29407
29408 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, strided_cm_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070029409 TEST_REQUIRES_X86_AVX;
29410 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029411 for (uint32_t n = 1; n <= 4; n++) {
29412 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070029413 GemmMicrokernelTester()
29414 .mr(2)
29415 .nr(4)
29416 .kr(8)
29417 .sr(1)
29418 .m(m)
29419 .n(n)
29420 .k(k)
29421 .cm_stride(7)
29422 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029423 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070029424 }
29425 }
29426 }
29427 }
29428
Marat Dukhan801d2c22021-06-02 21:25:05 -070029429 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, a_offset) {
29430 TEST_REQUIRES_X86_AVX;
29431 for (size_t k = 1; k <= 40; k += 9) {
29432 GemmMicrokernelTester()
29433 .mr(2)
29434 .nr(4)
29435 .kr(8)
29436 .sr(1)
29437 .m(2)
29438 .n(4)
29439 .k(k)
29440 .ks(3)
29441 .a_offset(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080029442 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070029443 }
29444 }
29445
29446 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, zero) {
29447 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080029448 for (size_t k = 1; k <= 40; k += 9) {
29449 for (uint32_t mz = 0; mz < 2; mz++) {
Marat Dukhan801d2c22021-06-02 21:25:05 -070029450 GemmMicrokernelTester()
29451 .mr(2)
29452 .nr(4)
29453 .kr(8)
29454 .sr(1)
29455 .m(2)
29456 .n(4)
29457 .k(k)
29458 .ks(3)
29459 .a_offset(83)
29460 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080029461 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070029462 }
29463 }
29464 }
29465
29466 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, qmin) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070029467 TEST_REQUIRES_X86_AVX;
29468 GemmMicrokernelTester()
29469 .mr(2)
29470 .nr(4)
29471 .kr(8)
29472 .sr(1)
29473 .m(2)
29474 .n(4)
29475 .k(8)
29476 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080029477 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070029478 }
29479
Marat Dukhan801d2c22021-06-02 21:25:05 -070029480 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, qmax) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070029481 TEST_REQUIRES_X86_AVX;
29482 GemmMicrokernelTester()
29483 .mr(2)
29484 .nr(4)
29485 .kr(8)
29486 .sr(1)
29487 .m(2)
29488 .n(4)
29489 .k(8)
29490 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080029491 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070029492 }
29493
Marat Dukhan801d2c22021-06-02 21:25:05 -070029494 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, strided_cm) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070029495 TEST_REQUIRES_X86_AVX;
29496 GemmMicrokernelTester()
29497 .mr(2)
29498 .nr(4)
29499 .kr(8)
29500 .sr(1)
29501 .m(2)
29502 .n(4)
29503 .k(8)
29504 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080029505 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070029506 }
29507#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
29508
29509
29510#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan801d2c22021-06-02 21:25:05 -070029511 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_eq_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070029512 TEST_REQUIRES_X86_XOP;
29513 GemmMicrokernelTester()
29514 .mr(3)
29515 .nr(4)
29516 .kr(8)
29517 .sr(1)
29518 .m(3)
29519 .n(4)
29520 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080029521 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070029522 }
29523
Marat Dukhan801d2c22021-06-02 21:25:05 -070029524 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070029525 TEST_REQUIRES_X86_XOP;
29526 GemmMicrokernelTester()
29527 .mr(3)
29528 .nr(4)
29529 .kr(8)
29530 .sr(1)
29531 .m(3)
29532 .n(4)
29533 .k(8)
29534 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080029535 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070029536 }
29537
Marat Dukhan801d2c22021-06-02 21:25:05 -070029538 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_eq_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070029539 TEST_REQUIRES_X86_XOP;
Zhi An Ng83844ae2022-01-14 09:52:25 -080029540 for (uint32_t n = 1; n <= 4; n++) {
29541 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070029542 GemmMicrokernelTester()
29543 .mr(3)
29544 .nr(4)
29545 .kr(8)
29546 .sr(1)
29547 .m(m)
29548 .n(n)
29549 .k(8)
29550 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029551 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070029552 }
29553 }
29554 }
29555
Marat Dukhan801d2c22021-06-02 21:25:05 -070029556 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_eq_8_subtile_m) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070029557 TEST_REQUIRES_X86_XOP;
29558 for (uint32_t m = 1; m <= 3; m++) {
29559 GemmMicrokernelTester()
29560 .mr(3)
29561 .nr(4)
29562 .kr(8)
29563 .sr(1)
29564 .m(m)
29565 .n(4)
29566 .k(8)
29567 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029568 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070029569 }
29570 }
29571
Marat Dukhan801d2c22021-06-02 21:25:05 -070029572 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_eq_8_subtile_n) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070029573 TEST_REQUIRES_X86_XOP;
29574 for (uint32_t n = 1; n <= 4; n++) {
29575 GemmMicrokernelTester()
29576 .mr(3)
29577 .nr(4)
29578 .kr(8)
29579 .sr(1)
29580 .m(3)
29581 .n(n)
29582 .k(8)
29583 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029584 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070029585 }
29586 }
29587
Marat Dukhan801d2c22021-06-02 21:25:05 -070029588 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_lt_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070029589 TEST_REQUIRES_X86_XOP;
29590 for (size_t k = 1; k < 8; k++) {
29591 GemmMicrokernelTester()
29592 .mr(3)
29593 .nr(4)
29594 .kr(8)
29595 .sr(1)
29596 .m(3)
29597 .n(4)
29598 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080029599 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070029600 }
29601 }
29602
Marat Dukhan801d2c22021-06-02 21:25:05 -070029603 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_lt_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070029604 TEST_REQUIRES_X86_XOP;
29605 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029606 for (uint32_t n = 1; n <= 4; n++) {
29607 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070029608 GemmMicrokernelTester()
29609 .mr(3)
29610 .nr(4)
29611 .kr(8)
29612 .sr(1)
29613 .m(m)
29614 .n(n)
29615 .k(k)
29616 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029617 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070029618 }
29619 }
29620 }
29621 }
29622
Marat Dukhan801d2c22021-06-02 21:25:05 -070029623 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_gt_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070029624 TEST_REQUIRES_X86_XOP;
29625 for (size_t k = 9; k < 16; k++) {
29626 GemmMicrokernelTester()
29627 .mr(3)
29628 .nr(4)
29629 .kr(8)
29630 .sr(1)
29631 .m(3)
29632 .n(4)
29633 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080029634 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070029635 }
29636 }
29637
Marat Dukhan801d2c22021-06-02 21:25:05 -070029638 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_gt_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070029639 TEST_REQUIRES_X86_XOP;
29640 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029641 for (uint32_t n = 1; n <= 4; n++) {
29642 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070029643 GemmMicrokernelTester()
29644 .mr(3)
29645 .nr(4)
29646 .kr(8)
29647 .sr(1)
29648 .m(m)
29649 .n(n)
29650 .k(k)
29651 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029652 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070029653 }
29654 }
29655 }
29656 }
29657
Marat Dukhan801d2c22021-06-02 21:25:05 -070029658 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_div_8) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070029659 TEST_REQUIRES_X86_XOP;
29660 for (size_t k = 16; k <= 80; k += 8) {
29661 GemmMicrokernelTester()
29662 .mr(3)
29663 .nr(4)
29664 .kr(8)
29665 .sr(1)
29666 .m(3)
29667 .n(4)
29668 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080029669 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070029670 }
29671 }
29672
Marat Dukhan801d2c22021-06-02 21:25:05 -070029673 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_div_8_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070029674 TEST_REQUIRES_X86_XOP;
29675 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029676 for (uint32_t n = 1; n <= 4; n++) {
29677 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070029678 GemmMicrokernelTester()
29679 .mr(3)
29680 .nr(4)
29681 .kr(8)
29682 .sr(1)
29683 .m(m)
29684 .n(n)
29685 .k(k)
29686 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029687 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070029688 }
29689 }
29690 }
29691 }
29692
Marat Dukhan801d2c22021-06-02 21:25:05 -070029693 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_gt_4) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070029694 TEST_REQUIRES_X86_XOP;
29695 for (uint32_t n = 5; n < 8; n++) {
29696 for (size_t k = 1; k <= 40; k += 9) {
29697 GemmMicrokernelTester()
29698 .mr(3)
29699 .nr(4)
29700 .kr(8)
29701 .sr(1)
29702 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080029703 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070029704 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080029705 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070029706 }
29707 }
29708 }
29709
Marat Dukhan801d2c22021-06-02 21:25:05 -070029710 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_gt_4_strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070029711 TEST_REQUIRES_X86_XOP;
29712 for (uint32_t n = 5; n < 8; n++) {
29713 for (size_t k = 1; k <= 40; k += 9) {
29714 GemmMicrokernelTester()
29715 .mr(3)
29716 .nr(4)
29717 .kr(8)
29718 .sr(1)
29719 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080029720 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070029721 .k(k)
29722 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080029723 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070029724 }
29725 }
29726 }
29727
Marat Dukhan801d2c22021-06-02 21:25:05 -070029728 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_gt_4_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070029729 TEST_REQUIRES_X86_XOP;
29730 for (uint32_t n = 5; n < 8; n++) {
29731 for (size_t k = 1; k <= 40; k += 9) {
29732 for (uint32_t m = 1; m <= 3; m++) {
29733 GemmMicrokernelTester()
29734 .mr(3)
29735 .nr(4)
29736 .kr(8)
29737 .sr(1)
29738 .m(m)
29739 .n(n)
29740 .k(k)
29741 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029742 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070029743 }
29744 }
29745 }
29746 }
29747
Marat Dukhan801d2c22021-06-02 21:25:05 -070029748 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_div_4) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070029749 TEST_REQUIRES_X86_XOP;
29750 for (uint32_t n = 8; n <= 12; n += 4) {
29751 for (size_t k = 1; k <= 40; k += 9) {
29752 GemmMicrokernelTester()
29753 .mr(3)
29754 .nr(4)
29755 .kr(8)
29756 .sr(1)
29757 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080029758 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070029759 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080029760 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070029761 }
29762 }
29763 }
29764
Marat Dukhan801d2c22021-06-02 21:25:05 -070029765 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_div_4_strided_cn) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070029766 TEST_REQUIRES_X86_XOP;
29767 for (uint32_t n = 8; n <= 12; n += 4) {
29768 for (size_t k = 1; k <= 40; k += 9) {
29769 GemmMicrokernelTester()
29770 .mr(3)
29771 .nr(4)
29772 .kr(8)
29773 .sr(1)
29774 .m(3)
29775 .n(n)
29776 .k(k)
29777 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080029778 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070029779 }
29780 }
29781 }
29782
Marat Dukhan801d2c22021-06-02 21:25:05 -070029783 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_div_4_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070029784 TEST_REQUIRES_X86_XOP;
29785 for (uint32_t n = 8; n <= 12; n += 4) {
29786 for (size_t k = 1; k <= 40; k += 9) {
29787 for (uint32_t m = 1; m <= 3; m++) {
29788 GemmMicrokernelTester()
29789 .mr(3)
29790 .nr(4)
29791 .kr(8)
29792 .sr(1)
29793 .m(m)
29794 .n(n)
29795 .k(k)
29796 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029797 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070029798 }
29799 }
29800 }
29801 }
29802
Marat Dukhan801d2c22021-06-02 21:25:05 -070029803 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, small_kernel) {
29804 TEST_REQUIRES_X86_XOP;
29805 for (size_t k = 1; k <= 40; k += 9) {
29806 GemmMicrokernelTester()
29807 .mr(3)
29808 .nr(4)
29809 .kr(8)
29810 .sr(1)
29811 .m(3)
29812 .n(4)
29813 .k(k)
29814 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080029815 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070029816 }
29817 }
29818
29819 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, small_kernel_subtile) {
29820 TEST_REQUIRES_X86_XOP;
29821 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029822 for (uint32_t n = 1; n <= 4; n++) {
29823 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan801d2c22021-06-02 21:25:05 -070029824 GemmMicrokernelTester()
29825 .mr(3)
29826 .nr(4)
29827 .kr(8)
29828 .sr(1)
29829 .m(m)
29830 .n(n)
29831 .k(k)
29832 .ks(3)
29833 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029834 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070029835 }
29836 }
29837 }
29838 }
29839
29840 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_gt_4_small_kernel) {
29841 TEST_REQUIRES_X86_XOP;
29842 for (uint32_t n = 5; n < 8; n++) {
29843 for (size_t k = 1; k <= 40; k += 9) {
29844 GemmMicrokernelTester()
29845 .mr(3)
29846 .nr(4)
29847 .kr(8)
29848 .sr(1)
29849 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080029850 .n(n)
Marat Dukhan801d2c22021-06-02 21:25:05 -070029851 .k(k)
29852 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080029853 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070029854 }
29855 }
29856 }
29857
29858 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_div_4_small_kernel) {
29859 TEST_REQUIRES_X86_XOP;
29860 for (uint32_t n = 8; n <= 12; n += 4) {
29861 for (size_t k = 1; k <= 40; k += 9) {
29862 GemmMicrokernelTester()
29863 .mr(3)
29864 .nr(4)
29865 .kr(8)
29866 .sr(1)
29867 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080029868 .n(n)
Marat Dukhan801d2c22021-06-02 21:25:05 -070029869 .k(k)
29870 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080029871 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070029872 }
29873 }
29874 }
29875
29876 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, strided_cm_subtile) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070029877 TEST_REQUIRES_X86_XOP;
29878 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029879 for (uint32_t n = 1; n <= 4; n++) {
29880 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070029881 GemmMicrokernelTester()
29882 .mr(3)
29883 .nr(4)
29884 .kr(8)
29885 .sr(1)
29886 .m(m)
29887 .n(n)
29888 .k(k)
29889 .cm_stride(7)
29890 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029891 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070029892 }
29893 }
29894 }
29895 }
29896
Marat Dukhan801d2c22021-06-02 21:25:05 -070029897 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, a_offset) {
29898 TEST_REQUIRES_X86_XOP;
29899 for (size_t k = 1; k <= 40; k += 9) {
29900 GemmMicrokernelTester()
29901 .mr(3)
29902 .nr(4)
29903 .kr(8)
29904 .sr(1)
29905 .m(3)
29906 .n(4)
29907 .k(k)
29908 .ks(3)
29909 .a_offset(127)
Marat Dukhan50323b82022-01-11 00:12:01 -080029910 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070029911 }
29912 }
29913
29914 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, zero) {
29915 TEST_REQUIRES_X86_XOP;
Zhi An Ng83844ae2022-01-14 09:52:25 -080029916 for (size_t k = 1; k <= 40; k += 9) {
29917 for (uint32_t mz = 0; mz < 3; mz++) {
Marat Dukhan801d2c22021-06-02 21:25:05 -070029918 GemmMicrokernelTester()
29919 .mr(3)
29920 .nr(4)
29921 .kr(8)
29922 .sr(1)
29923 .m(3)
29924 .n(4)
29925 .k(k)
29926 .ks(3)
29927 .a_offset(127)
29928 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080029929 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan801d2c22021-06-02 21:25:05 -070029930 }
29931 }
29932 }
29933
29934 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, qmin) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070029935 TEST_REQUIRES_X86_XOP;
29936 GemmMicrokernelTester()
29937 .mr(3)
29938 .nr(4)
29939 .kr(8)
29940 .sr(1)
29941 .m(3)
29942 .n(4)
29943 .k(8)
29944 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080029945 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070029946 }
29947
Marat Dukhan801d2c22021-06-02 21:25:05 -070029948 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, qmax) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070029949 TEST_REQUIRES_X86_XOP;
29950 GemmMicrokernelTester()
29951 .mr(3)
29952 .nr(4)
29953 .kr(8)
29954 .sr(1)
29955 .m(3)
29956 .n(4)
29957 .k(8)
29958 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080029959 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070029960 }
29961
Marat Dukhan801d2c22021-06-02 21:25:05 -070029962 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, strided_cm) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070029963 TEST_REQUIRES_X86_XOP;
29964 GemmMicrokernelTester()
29965 .mr(3)
29966 .nr(4)
29967 .kr(8)
29968 .sr(1)
29969 .m(3)
29970 .n(4)
29971 .k(8)
29972 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080029973 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070029974 }
29975#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
29976
29977
29978#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan9b474cf2021-05-25 16:37:48 -070029979 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_eq_8) {
29980 TEST_REQUIRES_X86_AVX2;
29981 GemmMicrokernelTester()
29982 .mr(1)
29983 .nr(8)
29984 .kr(8)
29985 .sr(1)
29986 .m(1)
29987 .n(8)
29988 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080029989 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070029990 }
29991
29992 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, strided_cn) {
29993 TEST_REQUIRES_X86_AVX2;
29994 GemmMicrokernelTester()
29995 .mr(1)
29996 .nr(8)
29997 .kr(8)
29998 .sr(1)
29999 .m(1)
30000 .n(8)
30001 .k(8)
30002 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080030003 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070030004 }
30005
30006 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_eq_8_subtile) {
30007 TEST_REQUIRES_X86_AVX2;
Zhi An Ng83844ae2022-01-14 09:52:25 -080030008 for (uint32_t n = 1; n <= 8; n++) {
30009 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -070030010 GemmMicrokernelTester()
30011 .mr(1)
30012 .nr(8)
30013 .kr(8)
30014 .sr(1)
30015 .m(m)
30016 .n(n)
30017 .k(8)
30018 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030019 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070030020 }
30021 }
30022 }
30023
30024 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_eq_8_subtile_m) {
30025 TEST_REQUIRES_X86_AVX2;
30026 for (uint32_t m = 1; m <= 1; m++) {
30027 GemmMicrokernelTester()
30028 .mr(1)
30029 .nr(8)
30030 .kr(8)
30031 .sr(1)
30032 .m(m)
30033 .n(8)
30034 .k(8)
30035 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030036 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070030037 }
30038 }
30039
30040 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_eq_8_subtile_n) {
30041 TEST_REQUIRES_X86_AVX2;
30042 for (uint32_t n = 1; n <= 8; n++) {
30043 GemmMicrokernelTester()
30044 .mr(1)
30045 .nr(8)
30046 .kr(8)
30047 .sr(1)
30048 .m(1)
30049 .n(n)
30050 .k(8)
30051 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030052 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070030053 }
30054 }
30055
30056 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_lt_8) {
30057 TEST_REQUIRES_X86_AVX2;
30058 for (size_t k = 1; k < 8; k++) {
30059 GemmMicrokernelTester()
30060 .mr(1)
30061 .nr(8)
30062 .kr(8)
30063 .sr(1)
30064 .m(1)
30065 .n(8)
30066 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080030067 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070030068 }
30069 }
30070
30071 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_lt_8_subtile) {
30072 TEST_REQUIRES_X86_AVX2;
30073 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030074 for (uint32_t n = 1; n <= 8; n++) {
30075 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -070030076 GemmMicrokernelTester()
30077 .mr(1)
30078 .nr(8)
30079 .kr(8)
30080 .sr(1)
30081 .m(m)
30082 .n(n)
30083 .k(k)
30084 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030085 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070030086 }
30087 }
30088 }
30089 }
30090
30091 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_gt_8) {
30092 TEST_REQUIRES_X86_AVX2;
30093 for (size_t k = 9; k < 16; k++) {
30094 GemmMicrokernelTester()
30095 .mr(1)
30096 .nr(8)
30097 .kr(8)
30098 .sr(1)
30099 .m(1)
30100 .n(8)
30101 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080030102 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070030103 }
30104 }
30105
30106 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_gt_8_subtile) {
30107 TEST_REQUIRES_X86_AVX2;
30108 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030109 for (uint32_t n = 1; n <= 8; n++) {
30110 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -070030111 GemmMicrokernelTester()
30112 .mr(1)
30113 .nr(8)
30114 .kr(8)
30115 .sr(1)
30116 .m(m)
30117 .n(n)
30118 .k(k)
30119 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030120 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070030121 }
30122 }
30123 }
30124 }
30125
30126 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_div_8) {
30127 TEST_REQUIRES_X86_AVX2;
30128 for (size_t k = 16; k <= 80; k += 8) {
30129 GemmMicrokernelTester()
30130 .mr(1)
30131 .nr(8)
30132 .kr(8)
30133 .sr(1)
30134 .m(1)
30135 .n(8)
30136 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080030137 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070030138 }
30139 }
30140
30141 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_div_8_subtile) {
30142 TEST_REQUIRES_X86_AVX2;
30143 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030144 for (uint32_t n = 1; n <= 8; n++) {
30145 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -070030146 GemmMicrokernelTester()
30147 .mr(1)
30148 .nr(8)
30149 .kr(8)
30150 .sr(1)
30151 .m(m)
30152 .n(n)
30153 .k(k)
30154 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030155 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070030156 }
30157 }
30158 }
30159 }
30160
30161 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_gt_8) {
30162 TEST_REQUIRES_X86_AVX2;
30163 for (uint32_t n = 9; n < 16; n++) {
30164 for (size_t k = 1; k <= 40; k += 9) {
30165 GemmMicrokernelTester()
30166 .mr(1)
30167 .nr(8)
30168 .kr(8)
30169 .sr(1)
30170 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080030171 .n(n)
Marat Dukhan9b474cf2021-05-25 16:37:48 -070030172 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080030173 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070030174 }
30175 }
30176 }
30177
30178 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_gt_8_strided_cn) {
30179 TEST_REQUIRES_X86_AVX2;
30180 for (uint32_t n = 9; n < 16; n++) {
30181 for (size_t k = 1; k <= 40; k += 9) {
30182 GemmMicrokernelTester()
30183 .mr(1)
30184 .nr(8)
30185 .kr(8)
30186 .sr(1)
30187 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080030188 .n(n)
Marat Dukhan9b474cf2021-05-25 16:37:48 -070030189 .k(k)
30190 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080030191 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070030192 }
30193 }
30194 }
30195
30196 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_gt_8_subtile) {
30197 TEST_REQUIRES_X86_AVX2;
30198 for (uint32_t n = 9; n < 16; n++) {
30199 for (size_t k = 1; k <= 40; k += 9) {
30200 for (uint32_t m = 1; m <= 1; m++) {
30201 GemmMicrokernelTester()
30202 .mr(1)
30203 .nr(8)
30204 .kr(8)
30205 .sr(1)
30206 .m(m)
30207 .n(n)
30208 .k(k)
30209 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030210 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070030211 }
30212 }
30213 }
30214 }
30215
30216 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_div_8) {
30217 TEST_REQUIRES_X86_AVX2;
30218 for (uint32_t n = 16; n <= 24; n += 8) {
30219 for (size_t k = 1; k <= 40; k += 9) {
30220 GemmMicrokernelTester()
30221 .mr(1)
30222 .nr(8)
30223 .kr(8)
30224 .sr(1)
30225 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080030226 .n(n)
Marat Dukhan9b474cf2021-05-25 16:37:48 -070030227 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080030228 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070030229 }
30230 }
30231 }
30232
30233 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_div_8_strided_cn) {
30234 TEST_REQUIRES_X86_AVX2;
30235 for (uint32_t n = 16; n <= 24; n += 8) {
30236 for (size_t k = 1; k <= 40; k += 9) {
30237 GemmMicrokernelTester()
30238 .mr(1)
30239 .nr(8)
30240 .kr(8)
30241 .sr(1)
30242 .m(1)
30243 .n(n)
30244 .k(k)
30245 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080030246 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070030247 }
30248 }
30249 }
30250
30251 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_div_8_subtile) {
30252 TEST_REQUIRES_X86_AVX2;
30253 for (uint32_t n = 16; n <= 24; n += 8) {
30254 for (size_t k = 1; k <= 40; k += 9) {
30255 for (uint32_t m = 1; m <= 1; m++) {
30256 GemmMicrokernelTester()
30257 .mr(1)
30258 .nr(8)
30259 .kr(8)
30260 .sr(1)
30261 .m(m)
30262 .n(n)
30263 .k(k)
30264 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030265 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070030266 }
30267 }
30268 }
30269 }
30270
30271 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, small_kernel) {
30272 TEST_REQUIRES_X86_AVX2;
30273 for (size_t k = 1; k <= 40; k += 9) {
30274 GemmMicrokernelTester()
30275 .mr(1)
30276 .nr(8)
30277 .kr(8)
30278 .sr(1)
30279 .m(1)
30280 .n(8)
30281 .k(k)
30282 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080030283 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070030284 }
30285 }
30286
30287 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, small_kernel_subtile) {
30288 TEST_REQUIRES_X86_AVX2;
30289 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030290 for (uint32_t n = 1; n <= 8; n++) {
30291 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -070030292 GemmMicrokernelTester()
30293 .mr(1)
30294 .nr(8)
30295 .kr(8)
30296 .sr(1)
30297 .m(m)
30298 .n(n)
30299 .k(k)
30300 .ks(3)
30301 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030302 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070030303 }
30304 }
30305 }
30306 }
30307
30308 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_gt_8_small_kernel) {
30309 TEST_REQUIRES_X86_AVX2;
30310 for (uint32_t n = 9; n < 16; n++) {
30311 for (size_t k = 1; k <= 40; k += 9) {
30312 GemmMicrokernelTester()
30313 .mr(1)
30314 .nr(8)
30315 .kr(8)
30316 .sr(1)
30317 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080030318 .n(n)
Marat Dukhan9b474cf2021-05-25 16:37:48 -070030319 .k(k)
30320 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080030321 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070030322 }
30323 }
30324 }
30325
30326 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_div_8_small_kernel) {
30327 TEST_REQUIRES_X86_AVX2;
30328 for (uint32_t n = 16; n <= 24; n += 8) {
30329 for (size_t k = 1; k <= 40; k += 9) {
30330 GemmMicrokernelTester()
30331 .mr(1)
30332 .nr(8)
30333 .kr(8)
30334 .sr(1)
30335 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080030336 .n(n)
Marat Dukhan9b474cf2021-05-25 16:37:48 -070030337 .k(k)
30338 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080030339 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070030340 }
30341 }
30342 }
30343
30344 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, strided_cm_subtile) {
30345 TEST_REQUIRES_X86_AVX2;
30346 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030347 for (uint32_t n = 1; n <= 8; n++) {
30348 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -070030349 GemmMicrokernelTester()
30350 .mr(1)
30351 .nr(8)
30352 .kr(8)
30353 .sr(1)
30354 .m(m)
30355 .n(n)
30356 .k(k)
30357 .cm_stride(11)
30358 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030359 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070030360 }
30361 }
30362 }
30363 }
30364
30365 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, a_offset) {
30366 TEST_REQUIRES_X86_AVX2;
30367 for (size_t k = 1; k <= 40; k += 9) {
30368 GemmMicrokernelTester()
30369 .mr(1)
30370 .nr(8)
30371 .kr(8)
30372 .sr(1)
30373 .m(1)
30374 .n(8)
30375 .k(k)
30376 .ks(3)
30377 .a_offset(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080030378 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070030379 }
30380 }
30381
30382 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, zero) {
30383 TEST_REQUIRES_X86_AVX2;
Zhi An Ng83844ae2022-01-14 09:52:25 -080030384 for (size_t k = 1; k <= 40; k += 9) {
30385 for (uint32_t mz = 0; mz < 1; mz++) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -070030386 GemmMicrokernelTester()
30387 .mr(1)
30388 .nr(8)
30389 .kr(8)
30390 .sr(1)
30391 .m(1)
30392 .n(8)
30393 .k(k)
30394 .ks(3)
30395 .a_offset(43)
30396 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080030397 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070030398 }
30399 }
30400 }
30401
30402 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, qmin) {
30403 TEST_REQUIRES_X86_AVX2;
30404 GemmMicrokernelTester()
30405 .mr(1)
30406 .nr(8)
30407 .kr(8)
30408 .sr(1)
30409 .m(1)
30410 .n(8)
30411 .k(8)
30412 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080030413 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070030414 }
30415
30416 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, qmax) {
30417 TEST_REQUIRES_X86_AVX2;
30418 GemmMicrokernelTester()
30419 .mr(1)
30420 .nr(8)
30421 .kr(8)
30422 .sr(1)
30423 .m(1)
30424 .n(8)
30425 .k(8)
30426 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080030427 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070030428 }
30429
30430 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, strided_cm) {
30431 TEST_REQUIRES_X86_AVX2;
30432 GemmMicrokernelTester()
30433 .mr(1)
30434 .nr(8)
30435 .kr(8)
30436 .sr(1)
30437 .m(1)
30438 .n(8)
30439 .k(8)
30440 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080030441 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070030442 }
30443#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
30444
30445
30446#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan71855ee2021-05-25 19:05:06 -070030447 TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_eq_8) {
30448 TEST_REQUIRES_X86_AVX512SKX;
30449 GemmMicrokernelTester()
30450 .mr(3)
30451 .nr(16)
30452 .kr(8)
30453 .sr(1)
30454 .m(3)
30455 .n(16)
30456 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080030457 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070030458 }
30459
30460 TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, strided_cn) {
30461 TEST_REQUIRES_X86_AVX512SKX;
30462 GemmMicrokernelTester()
30463 .mr(3)
30464 .nr(16)
30465 .kr(8)
30466 .sr(1)
30467 .m(3)
30468 .n(16)
30469 .k(8)
30470 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080030471 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070030472 }
30473
30474 TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_eq_8_subtile) {
30475 TEST_REQUIRES_X86_AVX512SKX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080030476 for (uint32_t n = 1; n <= 16; n++) {
30477 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan71855ee2021-05-25 19:05:06 -070030478 GemmMicrokernelTester()
30479 .mr(3)
30480 .nr(16)
30481 .kr(8)
30482 .sr(1)
30483 .m(m)
30484 .n(n)
30485 .k(8)
30486 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030487 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070030488 }
30489 }
30490 }
30491
30492 TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_eq_8_subtile_m) {
30493 TEST_REQUIRES_X86_AVX512SKX;
30494 for (uint32_t m = 1; m <= 3; m++) {
30495 GemmMicrokernelTester()
30496 .mr(3)
30497 .nr(16)
30498 .kr(8)
30499 .sr(1)
30500 .m(m)
30501 .n(16)
30502 .k(8)
30503 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030504 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070030505 }
30506 }
30507
30508 TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_eq_8_subtile_n) {
30509 TEST_REQUIRES_X86_AVX512SKX;
30510 for (uint32_t n = 1; n <= 16; n++) {
30511 GemmMicrokernelTester()
30512 .mr(3)
30513 .nr(16)
30514 .kr(8)
30515 .sr(1)
30516 .m(3)
30517 .n(n)
30518 .k(8)
30519 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030520 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070030521 }
30522 }
30523
30524 TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_lt_8) {
30525 TEST_REQUIRES_X86_AVX512SKX;
30526 for (size_t k = 1; k < 8; k++) {
30527 GemmMicrokernelTester()
30528 .mr(3)
30529 .nr(16)
30530 .kr(8)
30531 .sr(1)
30532 .m(3)
30533 .n(16)
30534 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080030535 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070030536 }
30537 }
30538
30539 TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_lt_8_subtile) {
30540 TEST_REQUIRES_X86_AVX512SKX;
30541 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030542 for (uint32_t n = 1; n <= 16; n++) {
30543 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan71855ee2021-05-25 19:05:06 -070030544 GemmMicrokernelTester()
30545 .mr(3)
30546 .nr(16)
30547 .kr(8)
30548 .sr(1)
30549 .m(m)
30550 .n(n)
30551 .k(k)
30552 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030553 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070030554 }
30555 }
30556 }
30557 }
30558
30559 TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_gt_8) {
30560 TEST_REQUIRES_X86_AVX512SKX;
30561 for (size_t k = 9; k < 16; k++) {
30562 GemmMicrokernelTester()
30563 .mr(3)
30564 .nr(16)
30565 .kr(8)
30566 .sr(1)
30567 .m(3)
30568 .n(16)
30569 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080030570 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070030571 }
30572 }
30573
30574 TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_gt_8_subtile) {
30575 TEST_REQUIRES_X86_AVX512SKX;
30576 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030577 for (uint32_t n = 1; n <= 16; n++) {
30578 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan71855ee2021-05-25 19:05:06 -070030579 GemmMicrokernelTester()
30580 .mr(3)
30581 .nr(16)
30582 .kr(8)
30583 .sr(1)
30584 .m(m)
30585 .n(n)
30586 .k(k)
30587 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030588 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070030589 }
30590 }
30591 }
30592 }
30593
30594 TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_div_8) {
30595 TEST_REQUIRES_X86_AVX512SKX;
30596 for (size_t k = 16; k <= 80; k += 8) {
30597 GemmMicrokernelTester()
30598 .mr(3)
30599 .nr(16)
30600 .kr(8)
30601 .sr(1)
30602 .m(3)
30603 .n(16)
30604 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080030605 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070030606 }
30607 }
30608
30609 TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_div_8_subtile) {
30610 TEST_REQUIRES_X86_AVX512SKX;
30611 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030612 for (uint32_t n = 1; n <= 16; n++) {
30613 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan71855ee2021-05-25 19:05:06 -070030614 GemmMicrokernelTester()
30615 .mr(3)
30616 .nr(16)
30617 .kr(8)
30618 .sr(1)
30619 .m(m)
30620 .n(n)
30621 .k(k)
30622 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030623 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070030624 }
30625 }
30626 }
30627 }
30628
30629 TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_gt_16) {
30630 TEST_REQUIRES_X86_AVX512SKX;
30631 for (uint32_t n = 17; n < 32; n++) {
30632 for (size_t k = 1; k <= 40; k += 9) {
30633 GemmMicrokernelTester()
30634 .mr(3)
30635 .nr(16)
30636 .kr(8)
30637 .sr(1)
30638 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080030639 .n(n)
Marat Dukhan71855ee2021-05-25 19:05:06 -070030640 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080030641 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070030642 }
30643 }
30644 }
30645
30646 TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_gt_16_strided_cn) {
30647 TEST_REQUIRES_X86_AVX512SKX;
30648 for (uint32_t n = 17; n < 32; n++) {
30649 for (size_t k = 1; k <= 40; k += 9) {
30650 GemmMicrokernelTester()
30651 .mr(3)
30652 .nr(16)
30653 .kr(8)
30654 .sr(1)
30655 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080030656 .n(n)
Marat Dukhan71855ee2021-05-25 19:05:06 -070030657 .k(k)
30658 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080030659 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070030660 }
30661 }
30662 }
30663
30664 TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_gt_16_subtile) {
30665 TEST_REQUIRES_X86_AVX512SKX;
30666 for (uint32_t n = 17; n < 32; n++) {
30667 for (size_t k = 1; k <= 40; k += 9) {
30668 for (uint32_t m = 1; m <= 3; m++) {
30669 GemmMicrokernelTester()
30670 .mr(3)
30671 .nr(16)
30672 .kr(8)
30673 .sr(1)
30674 .m(m)
30675 .n(n)
30676 .k(k)
30677 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030678 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070030679 }
30680 }
30681 }
30682 }
30683
30684 TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_div_16) {
30685 TEST_REQUIRES_X86_AVX512SKX;
30686 for (uint32_t n = 32; n <= 48; n += 16) {
30687 for (size_t k = 1; k <= 40; k += 9) {
30688 GemmMicrokernelTester()
30689 .mr(3)
30690 .nr(16)
30691 .kr(8)
30692 .sr(1)
30693 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080030694 .n(n)
Marat Dukhan71855ee2021-05-25 19:05:06 -070030695 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080030696 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070030697 }
30698 }
30699 }
30700
30701 TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_div_16_strided_cn) {
30702 TEST_REQUIRES_X86_AVX512SKX;
30703 for (uint32_t n = 32; n <= 48; n += 16) {
30704 for (size_t k = 1; k <= 40; k += 9) {
30705 GemmMicrokernelTester()
30706 .mr(3)
30707 .nr(16)
30708 .kr(8)
30709 .sr(1)
30710 .m(3)
30711 .n(n)
30712 .k(k)
30713 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080030714 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070030715 }
30716 }
30717 }
30718
30719 TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_div_16_subtile) {
30720 TEST_REQUIRES_X86_AVX512SKX;
30721 for (uint32_t n = 32; n <= 48; n += 16) {
30722 for (size_t k = 1; k <= 40; k += 9) {
30723 for (uint32_t m = 1; m <= 3; m++) {
30724 GemmMicrokernelTester()
30725 .mr(3)
30726 .nr(16)
30727 .kr(8)
30728 .sr(1)
30729 .m(m)
30730 .n(n)
30731 .k(k)
30732 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030733 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070030734 }
30735 }
30736 }
30737 }
30738
30739 TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, small_kernel) {
30740 TEST_REQUIRES_X86_AVX512SKX;
30741 for (size_t k = 1; k <= 40; k += 9) {
30742 GemmMicrokernelTester()
30743 .mr(3)
30744 .nr(16)
30745 .kr(8)
30746 .sr(1)
30747 .m(3)
30748 .n(16)
30749 .k(k)
30750 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080030751 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070030752 }
30753 }
30754
30755 TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, small_kernel_subtile) {
30756 TEST_REQUIRES_X86_AVX512SKX;
30757 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030758 for (uint32_t n = 1; n <= 16; n++) {
30759 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan71855ee2021-05-25 19:05:06 -070030760 GemmMicrokernelTester()
30761 .mr(3)
30762 .nr(16)
30763 .kr(8)
30764 .sr(1)
30765 .m(m)
30766 .n(n)
30767 .k(k)
30768 .ks(3)
30769 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030770 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070030771 }
30772 }
30773 }
30774 }
30775
30776 TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_gt_16_small_kernel) {
30777 TEST_REQUIRES_X86_AVX512SKX;
30778 for (uint32_t n = 17; n < 32; n++) {
30779 for (size_t k = 1; k <= 40; k += 9) {
30780 GemmMicrokernelTester()
30781 .mr(3)
30782 .nr(16)
30783 .kr(8)
30784 .sr(1)
30785 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080030786 .n(n)
Marat Dukhan71855ee2021-05-25 19:05:06 -070030787 .k(k)
30788 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080030789 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070030790 }
30791 }
30792 }
30793
30794 TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_div_16_small_kernel) {
30795 TEST_REQUIRES_X86_AVX512SKX;
30796 for (uint32_t n = 32; n <= 48; n += 16) {
30797 for (size_t k = 1; k <= 40; k += 9) {
30798 GemmMicrokernelTester()
30799 .mr(3)
30800 .nr(16)
30801 .kr(8)
30802 .sr(1)
30803 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080030804 .n(n)
Marat Dukhan71855ee2021-05-25 19:05:06 -070030805 .k(k)
30806 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080030807 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070030808 }
30809 }
30810 }
30811
30812 TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, strided_cm_subtile) {
30813 TEST_REQUIRES_X86_AVX512SKX;
30814 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030815 for (uint32_t n = 1; n <= 16; n++) {
30816 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan71855ee2021-05-25 19:05:06 -070030817 GemmMicrokernelTester()
30818 .mr(3)
30819 .nr(16)
30820 .kr(8)
30821 .sr(1)
30822 .m(m)
30823 .n(n)
30824 .k(k)
30825 .cm_stride(19)
30826 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030827 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070030828 }
30829 }
30830 }
30831 }
30832
30833 TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, a_offset) {
30834 TEST_REQUIRES_X86_AVX512SKX;
30835 for (size_t k = 1; k <= 40; k += 9) {
30836 GemmMicrokernelTester()
30837 .mr(3)
30838 .nr(16)
30839 .kr(8)
30840 .sr(1)
30841 .m(3)
30842 .n(16)
30843 .k(k)
30844 .ks(3)
30845 .a_offset(127)
Marat Dukhan50323b82022-01-11 00:12:01 -080030846 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070030847 }
30848 }
30849
30850 TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, zero) {
30851 TEST_REQUIRES_X86_AVX512SKX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080030852 for (size_t k = 1; k <= 40; k += 9) {
30853 for (uint32_t mz = 0; mz < 3; mz++) {
Marat Dukhan71855ee2021-05-25 19:05:06 -070030854 GemmMicrokernelTester()
30855 .mr(3)
30856 .nr(16)
30857 .kr(8)
30858 .sr(1)
30859 .m(3)
30860 .n(16)
30861 .k(k)
30862 .ks(3)
30863 .a_offset(127)
30864 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080030865 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070030866 }
30867 }
30868 }
30869
30870 TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, qmin) {
30871 TEST_REQUIRES_X86_AVX512SKX;
30872 GemmMicrokernelTester()
30873 .mr(3)
30874 .nr(16)
30875 .kr(8)
30876 .sr(1)
30877 .m(3)
30878 .n(16)
30879 .k(8)
30880 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080030881 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070030882 }
30883
30884 TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, qmax) {
30885 TEST_REQUIRES_X86_AVX512SKX;
30886 GemmMicrokernelTester()
30887 .mr(3)
30888 .nr(16)
30889 .kr(8)
30890 .sr(1)
30891 .m(3)
30892 .n(16)
30893 .k(8)
30894 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080030895 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070030896 }
30897
30898 TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, strided_cm) {
30899 TEST_REQUIRES_X86_AVX512SKX;
30900 GemmMicrokernelTester()
30901 .mr(3)
30902 .nr(16)
30903 .kr(8)
30904 .sr(1)
30905 .m(3)
30906 .n(16)
30907 .k(8)
30908 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080030909 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070030910 }
30911#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
30912
30913
Marat Dukhan4c617792021-12-21 15:47:58 -080030914#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan8dc106e2021-08-31 15:23:02 -070030915 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8) {
30916 GemmMicrokernelTester()
30917 .mr(1)
30918 .nr(4)
30919 .kr(2)
30920 .sr(1)
30921 .m(1)
30922 .n(4)
30923 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080030924 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070030925 }
30926
30927 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, strided_cn) {
30928 GemmMicrokernelTester()
30929 .mr(1)
30930 .nr(4)
30931 .kr(2)
30932 .sr(1)
30933 .m(1)
30934 .n(4)
30935 .k(8)
30936 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080030937 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070030938 }
30939
30940 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030941 for (uint32_t n = 1; n <= 4; n++) {
30942 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070030943 GemmMicrokernelTester()
30944 .mr(1)
30945 .nr(4)
30946 .kr(2)
30947 .sr(1)
30948 .m(m)
30949 .n(n)
30950 .k(8)
30951 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030952 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070030953 }
30954 }
30955 }
30956
30957 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
30958 for (uint32_t m = 1; m <= 1; m++) {
30959 GemmMicrokernelTester()
30960 .mr(1)
30961 .nr(4)
30962 .kr(2)
30963 .sr(1)
30964 .m(m)
30965 .n(4)
30966 .k(8)
30967 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030968 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070030969 }
30970 }
30971
30972 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
30973 for (uint32_t n = 1; n <= 4; n++) {
30974 GemmMicrokernelTester()
30975 .mr(1)
30976 .nr(4)
30977 .kr(2)
30978 .sr(1)
30979 .m(1)
30980 .n(n)
30981 .k(8)
30982 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030983 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070030984 }
30985 }
30986
30987 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8) {
30988 for (size_t k = 1; k < 8; k++) {
30989 GemmMicrokernelTester()
30990 .mr(1)
30991 .nr(4)
30992 .kr(2)
30993 .sr(1)
30994 .m(1)
30995 .n(4)
30996 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080030997 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070030998 }
30999 }
31000
31001 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
31002 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031003 for (uint32_t n = 1; n <= 4; n++) {
31004 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031005 GemmMicrokernelTester()
31006 .mr(1)
31007 .nr(4)
31008 .kr(2)
31009 .sr(1)
31010 .m(m)
31011 .n(n)
31012 .k(k)
31013 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080031014 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031015 }
31016 }
31017 }
31018 }
31019
31020 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8) {
31021 for (size_t k = 9; k < 16; k++) {
31022 GemmMicrokernelTester()
31023 .mr(1)
31024 .nr(4)
31025 .kr(2)
31026 .sr(1)
31027 .m(1)
31028 .n(4)
31029 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080031030 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031031 }
31032 }
31033
31034 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
31035 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031036 for (uint32_t n = 1; n <= 4; n++) {
31037 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031038 GemmMicrokernelTester()
31039 .mr(1)
31040 .nr(4)
31041 .kr(2)
31042 .sr(1)
31043 .m(m)
31044 .n(n)
31045 .k(k)
31046 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080031047 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031048 }
31049 }
31050 }
31051 }
31052
31053 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_div_8) {
31054 for (size_t k = 16; k <= 80; k += 8) {
31055 GemmMicrokernelTester()
31056 .mr(1)
31057 .nr(4)
31058 .kr(2)
31059 .sr(1)
31060 .m(1)
31061 .n(4)
31062 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080031063 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031064 }
31065 }
31066
31067 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
31068 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031069 for (uint32_t n = 1; n <= 4; n++) {
31070 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031071 GemmMicrokernelTester()
31072 .mr(1)
31073 .nr(4)
31074 .kr(2)
31075 .sr(1)
31076 .m(m)
31077 .n(n)
31078 .k(k)
31079 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080031080 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031081 }
31082 }
31083 }
31084 }
31085
31086 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4) {
31087 for (uint32_t n = 5; n < 8; n++) {
31088 for (size_t k = 1; k <= 40; k += 9) {
31089 GemmMicrokernelTester()
31090 .mr(1)
31091 .nr(4)
31092 .kr(2)
31093 .sr(1)
31094 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080031095 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031096 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080031097 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031098 }
31099 }
31100 }
31101
31102 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
31103 for (uint32_t n = 5; n < 8; n++) {
31104 for (size_t k = 1; k <= 40; k += 9) {
31105 GemmMicrokernelTester()
31106 .mr(1)
31107 .nr(4)
31108 .kr(2)
31109 .sr(1)
31110 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080031111 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031112 .k(k)
31113 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080031114 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031115 }
31116 }
31117 }
31118
31119 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
31120 for (uint32_t n = 5; n < 8; n++) {
31121 for (size_t k = 1; k <= 40; k += 9) {
31122 for (uint32_t m = 1; m <= 1; m++) {
31123 GemmMicrokernelTester()
31124 .mr(1)
31125 .nr(4)
31126 .kr(2)
31127 .sr(1)
31128 .m(m)
31129 .n(n)
31130 .k(k)
31131 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080031132 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031133 }
31134 }
31135 }
31136 }
31137
31138 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, n_div_4) {
31139 for (uint32_t n = 8; n <= 12; n += 4) {
31140 for (size_t k = 1; k <= 40; k += 9) {
31141 GemmMicrokernelTester()
31142 .mr(1)
31143 .nr(4)
31144 .kr(2)
31145 .sr(1)
31146 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080031147 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031148 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080031149 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031150 }
31151 }
31152 }
31153
31154 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
31155 for (uint32_t n = 8; n <= 12; n += 4) {
31156 for (size_t k = 1; k <= 40; k += 9) {
31157 GemmMicrokernelTester()
31158 .mr(1)
31159 .nr(4)
31160 .kr(2)
31161 .sr(1)
31162 .m(1)
31163 .n(n)
31164 .k(k)
31165 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080031166 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031167 }
31168 }
31169 }
31170
31171 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
31172 for (uint32_t n = 8; n <= 12; n += 4) {
31173 for (size_t k = 1; k <= 40; k += 9) {
31174 for (uint32_t m = 1; m <= 1; m++) {
31175 GemmMicrokernelTester()
31176 .mr(1)
31177 .nr(4)
31178 .kr(2)
31179 .sr(1)
31180 .m(m)
31181 .n(n)
31182 .k(k)
31183 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080031184 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031185 }
31186 }
31187 }
31188 }
31189
31190 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, small_kernel) {
31191 for (size_t k = 1; k <= 40; k += 9) {
31192 GemmMicrokernelTester()
31193 .mr(1)
31194 .nr(4)
31195 .kr(2)
31196 .sr(1)
31197 .m(1)
31198 .n(4)
31199 .k(k)
31200 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080031201 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031202 }
31203 }
31204
31205 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, small_kernel_subtile) {
31206 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031207 for (uint32_t n = 1; n <= 4; n++) {
31208 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031209 GemmMicrokernelTester()
31210 .mr(1)
31211 .nr(4)
31212 .kr(2)
31213 .sr(1)
31214 .m(m)
31215 .n(n)
31216 .k(k)
31217 .ks(3)
31218 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080031219 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031220 }
31221 }
31222 }
31223 }
31224
31225 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_small_kernel) {
31226 for (uint32_t n = 5; n < 8; n++) {
31227 for (size_t k = 1; k <= 40; k += 9) {
31228 GemmMicrokernelTester()
31229 .mr(1)
31230 .nr(4)
31231 .kr(2)
31232 .sr(1)
31233 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080031234 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031235 .k(k)
31236 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080031237 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031238 }
31239 }
31240 }
31241
31242 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_small_kernel) {
31243 for (uint32_t n = 8; n <= 12; n += 4) {
31244 for (size_t k = 1; k <= 40; k += 9) {
31245 GemmMicrokernelTester()
31246 .mr(1)
31247 .nr(4)
31248 .kr(2)
31249 .sr(1)
31250 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080031251 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031252 .k(k)
31253 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080031254 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031255 }
31256 }
31257 }
31258
31259 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
31260 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031261 for (uint32_t n = 1; n <= 4; n++) {
31262 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031263 GemmMicrokernelTester()
31264 .mr(1)
31265 .nr(4)
31266 .kr(2)
31267 .sr(1)
31268 .m(m)
31269 .n(n)
31270 .k(k)
31271 .cm_stride(7)
31272 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080031273 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031274 }
31275 }
31276 }
31277 }
31278
31279 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, a_offset) {
31280 for (size_t k = 1; k <= 40; k += 9) {
31281 GemmMicrokernelTester()
31282 .mr(1)
31283 .nr(4)
31284 .kr(2)
31285 .sr(1)
31286 .m(1)
31287 .n(4)
31288 .k(k)
31289 .ks(3)
31290 .a_offset(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080031291 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031292 }
31293 }
31294
31295 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031296 for (size_t k = 1; k <= 40; k += 9) {
31297 for (uint32_t mz = 0; mz < 1; mz++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031298 GemmMicrokernelTester()
31299 .mr(1)
31300 .nr(4)
31301 .kr(2)
31302 .sr(1)
31303 .m(1)
31304 .n(4)
31305 .k(k)
31306 .ks(3)
31307 .a_offset(43)
31308 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080031309 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031310 }
31311 }
31312 }
31313
31314 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, qmin) {
31315 GemmMicrokernelTester()
31316 .mr(1)
31317 .nr(4)
31318 .kr(2)
31319 .sr(1)
31320 .m(1)
31321 .n(4)
31322 .k(8)
31323 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080031324 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031325 }
31326
31327 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, qmax) {
31328 GemmMicrokernelTester()
31329 .mr(1)
31330 .nr(4)
31331 .kr(2)
31332 .sr(1)
31333 .m(1)
31334 .n(4)
31335 .k(8)
31336 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080031337 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031338 }
31339
31340 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, strided_cm) {
31341 GemmMicrokernelTester()
31342 .mr(1)
31343 .nr(4)
31344 .kr(2)
31345 .sr(1)
31346 .m(1)
31347 .n(4)
31348 .k(8)
31349 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080031350 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031351 }
Marat Dukhan4c617792021-12-21 15:47:58 -080031352#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031353
31354
Marat Dukhan4c617792021-12-21 15:47:58 -080031355#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031356 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8) {
31357 GemmMicrokernelTester()
31358 .mr(3)
31359 .nr(4)
31360 .kr(2)
31361 .sr(1)
31362 .m(3)
31363 .n(4)
31364 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080031365 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031366 }
31367
31368 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, strided_cn) {
31369 GemmMicrokernelTester()
31370 .mr(3)
31371 .nr(4)
31372 .kr(2)
31373 .sr(1)
31374 .m(3)
31375 .n(4)
31376 .k(8)
31377 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080031378 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031379 }
31380
31381 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031382 for (uint32_t n = 1; n <= 4; n++) {
31383 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031384 GemmMicrokernelTester()
31385 .mr(3)
31386 .nr(4)
31387 .kr(2)
31388 .sr(1)
31389 .m(m)
31390 .n(n)
31391 .k(8)
31392 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080031393 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031394 }
31395 }
31396 }
31397
31398 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
31399 for (uint32_t m = 1; m <= 3; m++) {
31400 GemmMicrokernelTester()
31401 .mr(3)
31402 .nr(4)
31403 .kr(2)
31404 .sr(1)
31405 .m(m)
31406 .n(4)
31407 .k(8)
31408 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080031409 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031410 }
31411 }
31412
31413 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
31414 for (uint32_t n = 1; n <= 4; n++) {
31415 GemmMicrokernelTester()
31416 .mr(3)
31417 .nr(4)
31418 .kr(2)
31419 .sr(1)
31420 .m(3)
31421 .n(n)
31422 .k(8)
31423 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080031424 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031425 }
31426 }
31427
31428 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8) {
31429 for (size_t k = 1; k < 8; k++) {
31430 GemmMicrokernelTester()
31431 .mr(3)
31432 .nr(4)
31433 .kr(2)
31434 .sr(1)
31435 .m(3)
31436 .n(4)
31437 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080031438 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031439 }
31440 }
31441
31442 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
31443 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031444 for (uint32_t n = 1; n <= 4; n++) {
31445 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031446 GemmMicrokernelTester()
31447 .mr(3)
31448 .nr(4)
31449 .kr(2)
31450 .sr(1)
31451 .m(m)
31452 .n(n)
31453 .k(k)
31454 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080031455 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031456 }
31457 }
31458 }
31459 }
31460
31461 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8) {
31462 for (size_t k = 9; k < 16; k++) {
31463 GemmMicrokernelTester()
31464 .mr(3)
31465 .nr(4)
31466 .kr(2)
31467 .sr(1)
31468 .m(3)
31469 .n(4)
31470 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080031471 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031472 }
31473 }
31474
31475 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
31476 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031477 for (uint32_t n = 1; n <= 4; n++) {
31478 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031479 GemmMicrokernelTester()
31480 .mr(3)
31481 .nr(4)
31482 .kr(2)
31483 .sr(1)
31484 .m(m)
31485 .n(n)
31486 .k(k)
31487 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080031488 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031489 }
31490 }
31491 }
31492 }
31493
31494 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_div_8) {
31495 for (size_t k = 16; k <= 80; k += 8) {
31496 GemmMicrokernelTester()
31497 .mr(3)
31498 .nr(4)
31499 .kr(2)
31500 .sr(1)
31501 .m(3)
31502 .n(4)
31503 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080031504 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031505 }
31506 }
31507
31508 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
31509 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031510 for (uint32_t n = 1; n <= 4; n++) {
31511 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031512 GemmMicrokernelTester()
31513 .mr(3)
31514 .nr(4)
31515 .kr(2)
31516 .sr(1)
31517 .m(m)
31518 .n(n)
31519 .k(k)
31520 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080031521 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031522 }
31523 }
31524 }
31525 }
31526
31527 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4) {
31528 for (uint32_t n = 5; n < 8; n++) {
31529 for (size_t k = 1; k <= 40; k += 9) {
31530 GemmMicrokernelTester()
31531 .mr(3)
31532 .nr(4)
31533 .kr(2)
31534 .sr(1)
31535 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080031536 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031537 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080031538 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031539 }
31540 }
31541 }
31542
31543 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
31544 for (uint32_t n = 5; n < 8; n++) {
31545 for (size_t k = 1; k <= 40; k += 9) {
31546 GemmMicrokernelTester()
31547 .mr(3)
31548 .nr(4)
31549 .kr(2)
31550 .sr(1)
31551 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080031552 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031553 .k(k)
31554 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080031555 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031556 }
31557 }
31558 }
31559
31560 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
31561 for (uint32_t n = 5; n < 8; n++) {
31562 for (size_t k = 1; k <= 40; k += 9) {
31563 for (uint32_t m = 1; m <= 3; m++) {
31564 GemmMicrokernelTester()
31565 .mr(3)
31566 .nr(4)
31567 .kr(2)
31568 .sr(1)
31569 .m(m)
31570 .n(n)
31571 .k(k)
31572 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080031573 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031574 }
31575 }
31576 }
31577 }
31578
31579 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, n_div_4) {
31580 for (uint32_t n = 8; n <= 12; n += 4) {
31581 for (size_t k = 1; k <= 40; k += 9) {
31582 GemmMicrokernelTester()
31583 .mr(3)
31584 .nr(4)
31585 .kr(2)
31586 .sr(1)
31587 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080031588 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031589 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080031590 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031591 }
31592 }
31593 }
31594
31595 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
31596 for (uint32_t n = 8; n <= 12; n += 4) {
31597 for (size_t k = 1; k <= 40; k += 9) {
31598 GemmMicrokernelTester()
31599 .mr(3)
31600 .nr(4)
31601 .kr(2)
31602 .sr(1)
31603 .m(3)
31604 .n(n)
31605 .k(k)
31606 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080031607 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031608 }
31609 }
31610 }
31611
31612 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
31613 for (uint32_t n = 8; n <= 12; n += 4) {
31614 for (size_t k = 1; k <= 40; k += 9) {
31615 for (uint32_t m = 1; m <= 3; m++) {
31616 GemmMicrokernelTester()
31617 .mr(3)
31618 .nr(4)
31619 .kr(2)
31620 .sr(1)
31621 .m(m)
31622 .n(n)
31623 .k(k)
31624 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080031625 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031626 }
31627 }
31628 }
31629 }
31630
31631 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, small_kernel) {
31632 for (size_t k = 1; k <= 40; k += 9) {
31633 GemmMicrokernelTester()
31634 .mr(3)
31635 .nr(4)
31636 .kr(2)
31637 .sr(1)
31638 .m(3)
31639 .n(4)
31640 .k(k)
31641 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080031642 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031643 }
31644 }
31645
31646 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, small_kernel_subtile) {
31647 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031648 for (uint32_t n = 1; n <= 4; n++) {
31649 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031650 GemmMicrokernelTester()
31651 .mr(3)
31652 .nr(4)
31653 .kr(2)
31654 .sr(1)
31655 .m(m)
31656 .n(n)
31657 .k(k)
31658 .ks(3)
31659 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080031660 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031661 }
31662 }
31663 }
31664 }
31665
31666 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_small_kernel) {
31667 for (uint32_t n = 5; n < 8; n++) {
31668 for (size_t k = 1; k <= 40; k += 9) {
31669 GemmMicrokernelTester()
31670 .mr(3)
31671 .nr(4)
31672 .kr(2)
31673 .sr(1)
31674 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080031675 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031676 .k(k)
31677 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080031678 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031679 }
31680 }
31681 }
31682
31683 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_small_kernel) {
31684 for (uint32_t n = 8; n <= 12; n += 4) {
31685 for (size_t k = 1; k <= 40; k += 9) {
31686 GemmMicrokernelTester()
31687 .mr(3)
31688 .nr(4)
31689 .kr(2)
31690 .sr(1)
31691 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080031692 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031693 .k(k)
31694 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080031695 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031696 }
31697 }
31698 }
31699
31700 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
31701 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031702 for (uint32_t n = 1; n <= 4; n++) {
31703 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031704 GemmMicrokernelTester()
31705 .mr(3)
31706 .nr(4)
31707 .kr(2)
31708 .sr(1)
31709 .m(m)
31710 .n(n)
31711 .k(k)
31712 .cm_stride(7)
31713 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080031714 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031715 }
31716 }
31717 }
31718 }
31719
31720 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, a_offset) {
31721 for (size_t k = 1; k <= 40; k += 9) {
31722 GemmMicrokernelTester()
31723 .mr(3)
31724 .nr(4)
31725 .kr(2)
31726 .sr(1)
31727 .m(3)
31728 .n(4)
31729 .k(k)
31730 .ks(3)
31731 .a_offset(127)
Marat Dukhan50323b82022-01-11 00:12:01 -080031732 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031733 }
31734 }
31735
31736 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031737 for (size_t k = 1; k <= 40; k += 9) {
31738 for (uint32_t mz = 0; mz < 3; mz++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031739 GemmMicrokernelTester()
31740 .mr(3)
31741 .nr(4)
31742 .kr(2)
31743 .sr(1)
31744 .m(3)
31745 .n(4)
31746 .k(k)
31747 .ks(3)
31748 .a_offset(127)
31749 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080031750 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031751 }
31752 }
31753 }
31754
31755 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, qmin) {
31756 GemmMicrokernelTester()
31757 .mr(3)
31758 .nr(4)
31759 .kr(2)
31760 .sr(1)
31761 .m(3)
31762 .n(4)
31763 .k(8)
31764 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080031765 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031766 }
31767
31768 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, qmax) {
31769 GemmMicrokernelTester()
31770 .mr(3)
31771 .nr(4)
31772 .kr(2)
31773 .sr(1)
31774 .m(3)
31775 .n(4)
31776 .k(8)
31777 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080031778 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031779 }
31780
31781 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, strided_cm) {
31782 GemmMicrokernelTester()
31783 .mr(3)
31784 .nr(4)
31785 .kr(2)
31786 .sr(1)
31787 .m(3)
31788 .n(4)
31789 .k(8)
31790 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080031791 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031792 }
Marat Dukhan4c617792021-12-21 15:47:58 -080031793#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031794
31795
Marat Dukhan4c617792021-12-21 15:47:58 -080031796#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031797 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8) {
31798 GemmMicrokernelTester()
31799 .mr(1)
31800 .nr(4)
31801 .kr(2)
31802 .sr(1)
31803 .m(1)
31804 .n(4)
31805 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080031806 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031807 }
31808
31809 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, strided_cn) {
31810 GemmMicrokernelTester()
31811 .mr(1)
31812 .nr(4)
31813 .kr(2)
31814 .sr(1)
31815 .m(1)
31816 .n(4)
31817 .k(8)
31818 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080031819 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031820 }
31821
31822 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031823 for (uint32_t n = 1; n <= 4; n++) {
31824 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031825 GemmMicrokernelTester()
31826 .mr(1)
31827 .nr(4)
31828 .kr(2)
31829 .sr(1)
31830 .m(m)
31831 .n(n)
31832 .k(8)
31833 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080031834 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031835 }
31836 }
31837 }
31838
31839 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
31840 for (uint32_t m = 1; m <= 1; m++) {
31841 GemmMicrokernelTester()
31842 .mr(1)
31843 .nr(4)
31844 .kr(2)
31845 .sr(1)
31846 .m(m)
31847 .n(4)
31848 .k(8)
31849 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080031850 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031851 }
31852 }
31853
31854 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
31855 for (uint32_t n = 1; n <= 4; n++) {
31856 GemmMicrokernelTester()
31857 .mr(1)
31858 .nr(4)
31859 .kr(2)
31860 .sr(1)
31861 .m(1)
31862 .n(n)
31863 .k(8)
31864 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080031865 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031866 }
31867 }
31868
31869 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_lt_8) {
31870 for (size_t k = 1; k < 8; k++) {
31871 GemmMicrokernelTester()
31872 .mr(1)
31873 .nr(4)
31874 .kr(2)
31875 .sr(1)
31876 .m(1)
31877 .n(4)
31878 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080031879 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031880 }
31881 }
31882
31883 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
31884 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031885 for (uint32_t n = 1; n <= 4; n++) {
31886 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031887 GemmMicrokernelTester()
31888 .mr(1)
31889 .nr(4)
31890 .kr(2)
31891 .sr(1)
31892 .m(m)
31893 .n(n)
31894 .k(k)
31895 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080031896 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031897 }
31898 }
31899 }
31900 }
31901
31902 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_gt_8) {
31903 for (size_t k = 9; k < 16; k++) {
31904 GemmMicrokernelTester()
31905 .mr(1)
31906 .nr(4)
31907 .kr(2)
31908 .sr(1)
31909 .m(1)
31910 .n(4)
31911 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080031912 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031913 }
31914 }
31915
31916 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
31917 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031918 for (uint32_t n = 1; n <= 4; n++) {
31919 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031920 GemmMicrokernelTester()
31921 .mr(1)
31922 .nr(4)
31923 .kr(2)
31924 .sr(1)
31925 .m(m)
31926 .n(n)
31927 .k(k)
31928 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080031929 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031930 }
31931 }
31932 }
31933 }
31934
31935 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_div_8) {
31936 for (size_t k = 16; k <= 80; k += 8) {
31937 GemmMicrokernelTester()
31938 .mr(1)
31939 .nr(4)
31940 .kr(2)
31941 .sr(1)
31942 .m(1)
31943 .n(4)
31944 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080031945 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031946 }
31947 }
31948
31949 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
31950 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031951 for (uint32_t n = 1; n <= 4; n++) {
31952 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031953 GemmMicrokernelTester()
31954 .mr(1)
31955 .nr(4)
31956 .kr(2)
31957 .sr(1)
31958 .m(m)
31959 .n(n)
31960 .k(k)
31961 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080031962 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031963 }
31964 }
31965 }
31966 }
31967
31968 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4) {
31969 for (uint32_t n = 5; n < 8; n++) {
31970 for (size_t k = 1; k <= 40; k += 9) {
31971 GemmMicrokernelTester()
31972 .mr(1)
31973 .nr(4)
31974 .kr(2)
31975 .sr(1)
31976 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080031977 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031978 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080031979 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031980 }
31981 }
31982 }
31983
31984 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
31985 for (uint32_t n = 5; n < 8; n++) {
31986 for (size_t k = 1; k <= 40; k += 9) {
31987 GemmMicrokernelTester()
31988 .mr(1)
31989 .nr(4)
31990 .kr(2)
31991 .sr(1)
31992 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080031993 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031994 .k(k)
31995 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080031996 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070031997 }
31998 }
31999 }
32000
32001 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
32002 for (uint32_t n = 5; n < 8; n++) {
32003 for (size_t k = 1; k <= 40; k += 9) {
32004 for (uint32_t m = 1; m <= 1; m++) {
32005 GemmMicrokernelTester()
32006 .mr(1)
32007 .nr(4)
32008 .kr(2)
32009 .sr(1)
32010 .m(m)
32011 .n(n)
32012 .k(k)
32013 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080032014 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032015 }
32016 }
32017 }
32018 }
32019
32020 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, n_div_4) {
32021 for (uint32_t n = 8; n <= 12; n += 4) {
32022 for (size_t k = 1; k <= 40; k += 9) {
32023 GemmMicrokernelTester()
32024 .mr(1)
32025 .nr(4)
32026 .kr(2)
32027 .sr(1)
32028 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080032029 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032030 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080032031 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032032 }
32033 }
32034 }
32035
32036 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
32037 for (uint32_t n = 8; n <= 12; n += 4) {
32038 for (size_t k = 1; k <= 40; k += 9) {
32039 GemmMicrokernelTester()
32040 .mr(1)
32041 .nr(4)
32042 .kr(2)
32043 .sr(1)
32044 .m(1)
32045 .n(n)
32046 .k(k)
32047 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080032048 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032049 }
32050 }
32051 }
32052
32053 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
32054 for (uint32_t n = 8; n <= 12; n += 4) {
32055 for (size_t k = 1; k <= 40; k += 9) {
32056 for (uint32_t m = 1; m <= 1; m++) {
32057 GemmMicrokernelTester()
32058 .mr(1)
32059 .nr(4)
32060 .kr(2)
32061 .sr(1)
32062 .m(m)
32063 .n(n)
32064 .k(k)
32065 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080032066 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032067 }
32068 }
32069 }
32070 }
32071
32072 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, small_kernel) {
32073 for (size_t k = 1; k <= 40; k += 9) {
32074 GemmMicrokernelTester()
32075 .mr(1)
32076 .nr(4)
32077 .kr(2)
32078 .sr(1)
32079 .m(1)
32080 .n(4)
32081 .k(k)
32082 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080032083 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032084 }
32085 }
32086
32087 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, small_kernel_subtile) {
32088 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080032089 for (uint32_t n = 1; n <= 4; n++) {
32090 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032091 GemmMicrokernelTester()
32092 .mr(1)
32093 .nr(4)
32094 .kr(2)
32095 .sr(1)
32096 .m(m)
32097 .n(n)
32098 .k(k)
32099 .ks(3)
32100 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080032101 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032102 }
32103 }
32104 }
32105 }
32106
32107 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_small_kernel) {
32108 for (uint32_t n = 5; n < 8; n++) {
32109 for (size_t k = 1; k <= 40; k += 9) {
32110 GemmMicrokernelTester()
32111 .mr(1)
32112 .nr(4)
32113 .kr(2)
32114 .sr(1)
32115 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080032116 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032117 .k(k)
32118 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080032119 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032120 }
32121 }
32122 }
32123
32124 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_small_kernel) {
32125 for (uint32_t n = 8; n <= 12; n += 4) {
32126 for (size_t k = 1; k <= 40; k += 9) {
32127 GemmMicrokernelTester()
32128 .mr(1)
32129 .nr(4)
32130 .kr(2)
32131 .sr(1)
32132 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080032133 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032134 .k(k)
32135 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080032136 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032137 }
32138 }
32139 }
32140
32141 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
32142 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080032143 for (uint32_t n = 1; n <= 4; n++) {
32144 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032145 GemmMicrokernelTester()
32146 .mr(1)
32147 .nr(4)
32148 .kr(2)
32149 .sr(1)
32150 .m(m)
32151 .n(n)
32152 .k(k)
32153 .cm_stride(7)
32154 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080032155 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032156 }
32157 }
32158 }
32159 }
32160
32161 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, a_offset) {
32162 for (size_t k = 1; k <= 40; k += 9) {
32163 GemmMicrokernelTester()
32164 .mr(1)
32165 .nr(4)
32166 .kr(2)
32167 .sr(1)
32168 .m(1)
32169 .n(4)
32170 .k(k)
32171 .ks(3)
32172 .a_offset(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080032173 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032174 }
32175 }
32176
32177 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080032178 for (size_t k = 1; k <= 40; k += 9) {
32179 for (uint32_t mz = 0; mz < 1; mz++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032180 GemmMicrokernelTester()
32181 .mr(1)
32182 .nr(4)
32183 .kr(2)
32184 .sr(1)
32185 .m(1)
32186 .n(4)
32187 .k(k)
32188 .ks(3)
32189 .a_offset(43)
32190 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080032191 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032192 }
32193 }
32194 }
32195
32196 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, qmin) {
32197 GemmMicrokernelTester()
32198 .mr(1)
32199 .nr(4)
32200 .kr(2)
32201 .sr(1)
32202 .m(1)
32203 .n(4)
32204 .k(8)
32205 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080032206 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032207 }
32208
32209 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, qmax) {
32210 GemmMicrokernelTester()
32211 .mr(1)
32212 .nr(4)
32213 .kr(2)
32214 .sr(1)
32215 .m(1)
32216 .n(4)
32217 .k(8)
32218 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080032219 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032220 }
32221
32222 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, strided_cm) {
32223 GemmMicrokernelTester()
32224 .mr(1)
32225 .nr(4)
32226 .kr(2)
32227 .sr(1)
32228 .m(1)
32229 .n(4)
32230 .k(8)
32231 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080032232 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032233 }
Marat Dukhan4c617792021-12-21 15:47:58 -080032234#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032235
32236
Marat Dukhan4c617792021-12-21 15:47:58 -080032237#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032238 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8) {
32239 GemmMicrokernelTester()
32240 .mr(2)
32241 .nr(4)
32242 .kr(2)
32243 .sr(1)
32244 .m(2)
32245 .n(4)
32246 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080032247 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032248 }
32249
32250 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, strided_cn) {
32251 GemmMicrokernelTester()
32252 .mr(2)
32253 .nr(4)
32254 .kr(2)
32255 .sr(1)
32256 .m(2)
32257 .n(4)
32258 .k(8)
32259 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080032260 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032261 }
32262
32263 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080032264 for (uint32_t n = 1; n <= 4; n++) {
32265 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032266 GemmMicrokernelTester()
32267 .mr(2)
32268 .nr(4)
32269 .kr(2)
32270 .sr(1)
32271 .m(m)
32272 .n(n)
32273 .k(8)
32274 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080032275 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032276 }
32277 }
32278 }
32279
32280 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
32281 for (uint32_t m = 1; m <= 2; m++) {
32282 GemmMicrokernelTester()
32283 .mr(2)
32284 .nr(4)
32285 .kr(2)
32286 .sr(1)
32287 .m(m)
32288 .n(4)
32289 .k(8)
32290 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080032291 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032292 }
32293 }
32294
32295 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
32296 for (uint32_t n = 1; n <= 4; n++) {
32297 GemmMicrokernelTester()
32298 .mr(2)
32299 .nr(4)
32300 .kr(2)
32301 .sr(1)
32302 .m(2)
32303 .n(n)
32304 .k(8)
32305 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080032306 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032307 }
32308 }
32309
32310 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_lt_8) {
32311 for (size_t k = 1; k < 8; k++) {
32312 GemmMicrokernelTester()
32313 .mr(2)
32314 .nr(4)
32315 .kr(2)
32316 .sr(1)
32317 .m(2)
32318 .n(4)
32319 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080032320 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032321 }
32322 }
32323
32324 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
32325 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080032326 for (uint32_t n = 1; n <= 4; n++) {
32327 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032328 GemmMicrokernelTester()
32329 .mr(2)
32330 .nr(4)
32331 .kr(2)
32332 .sr(1)
32333 .m(m)
32334 .n(n)
32335 .k(k)
32336 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080032337 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032338 }
32339 }
32340 }
32341 }
32342
32343 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_gt_8) {
32344 for (size_t k = 9; k < 16; k++) {
32345 GemmMicrokernelTester()
32346 .mr(2)
32347 .nr(4)
32348 .kr(2)
32349 .sr(1)
32350 .m(2)
32351 .n(4)
32352 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080032353 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032354 }
32355 }
32356
32357 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
32358 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080032359 for (uint32_t n = 1; n <= 4; n++) {
32360 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032361 GemmMicrokernelTester()
32362 .mr(2)
32363 .nr(4)
32364 .kr(2)
32365 .sr(1)
32366 .m(m)
32367 .n(n)
32368 .k(k)
32369 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080032370 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032371 }
32372 }
32373 }
32374 }
32375
32376 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_div_8) {
32377 for (size_t k = 16; k <= 80; k += 8) {
32378 GemmMicrokernelTester()
32379 .mr(2)
32380 .nr(4)
32381 .kr(2)
32382 .sr(1)
32383 .m(2)
32384 .n(4)
32385 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080032386 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032387 }
32388 }
32389
32390 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
32391 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080032392 for (uint32_t n = 1; n <= 4; n++) {
32393 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032394 GemmMicrokernelTester()
32395 .mr(2)
32396 .nr(4)
32397 .kr(2)
32398 .sr(1)
32399 .m(m)
32400 .n(n)
32401 .k(k)
32402 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080032403 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032404 }
32405 }
32406 }
32407 }
32408
32409 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4) {
32410 for (uint32_t n = 5; n < 8; n++) {
32411 for (size_t k = 1; k <= 40; k += 9) {
32412 GemmMicrokernelTester()
32413 .mr(2)
32414 .nr(4)
32415 .kr(2)
32416 .sr(1)
32417 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080032418 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032419 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080032420 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032421 }
32422 }
32423 }
32424
32425 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
32426 for (uint32_t n = 5; n < 8; n++) {
32427 for (size_t k = 1; k <= 40; k += 9) {
32428 GemmMicrokernelTester()
32429 .mr(2)
32430 .nr(4)
32431 .kr(2)
32432 .sr(1)
32433 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080032434 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032435 .k(k)
32436 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080032437 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032438 }
32439 }
32440 }
32441
32442 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
32443 for (uint32_t n = 5; n < 8; n++) {
32444 for (size_t k = 1; k <= 40; k += 9) {
32445 for (uint32_t m = 1; m <= 2; m++) {
32446 GemmMicrokernelTester()
32447 .mr(2)
32448 .nr(4)
32449 .kr(2)
32450 .sr(1)
32451 .m(m)
32452 .n(n)
32453 .k(k)
32454 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080032455 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032456 }
32457 }
32458 }
32459 }
32460
32461 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, n_div_4) {
32462 for (uint32_t n = 8; n <= 12; n += 4) {
32463 for (size_t k = 1; k <= 40; k += 9) {
32464 GemmMicrokernelTester()
32465 .mr(2)
32466 .nr(4)
32467 .kr(2)
32468 .sr(1)
32469 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080032470 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032471 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080032472 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032473 }
32474 }
32475 }
32476
32477 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
32478 for (uint32_t n = 8; n <= 12; n += 4) {
32479 for (size_t k = 1; k <= 40; k += 9) {
32480 GemmMicrokernelTester()
32481 .mr(2)
32482 .nr(4)
32483 .kr(2)
32484 .sr(1)
32485 .m(2)
32486 .n(n)
32487 .k(k)
32488 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080032489 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032490 }
32491 }
32492 }
32493
32494 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
32495 for (uint32_t n = 8; n <= 12; n += 4) {
32496 for (size_t k = 1; k <= 40; k += 9) {
32497 for (uint32_t m = 1; m <= 2; m++) {
32498 GemmMicrokernelTester()
32499 .mr(2)
32500 .nr(4)
32501 .kr(2)
32502 .sr(1)
32503 .m(m)
32504 .n(n)
32505 .k(k)
32506 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080032507 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032508 }
32509 }
32510 }
32511 }
32512
32513 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, small_kernel) {
32514 for (size_t k = 1; k <= 40; k += 9) {
32515 GemmMicrokernelTester()
32516 .mr(2)
32517 .nr(4)
32518 .kr(2)
32519 .sr(1)
32520 .m(2)
32521 .n(4)
32522 .k(k)
32523 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080032524 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032525 }
32526 }
32527
32528 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, small_kernel_subtile) {
32529 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080032530 for (uint32_t n = 1; n <= 4; n++) {
32531 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032532 GemmMicrokernelTester()
32533 .mr(2)
32534 .nr(4)
32535 .kr(2)
32536 .sr(1)
32537 .m(m)
32538 .n(n)
32539 .k(k)
32540 .ks(3)
32541 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080032542 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032543 }
32544 }
32545 }
32546 }
32547
32548 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_small_kernel) {
32549 for (uint32_t n = 5; n < 8; n++) {
32550 for (size_t k = 1; k <= 40; k += 9) {
32551 GemmMicrokernelTester()
32552 .mr(2)
32553 .nr(4)
32554 .kr(2)
32555 .sr(1)
32556 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080032557 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032558 .k(k)
32559 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080032560 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032561 }
32562 }
32563 }
32564
32565 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_small_kernel) {
32566 for (uint32_t n = 8; n <= 12; n += 4) {
32567 for (size_t k = 1; k <= 40; k += 9) {
32568 GemmMicrokernelTester()
32569 .mr(2)
32570 .nr(4)
32571 .kr(2)
32572 .sr(1)
32573 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080032574 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032575 .k(k)
32576 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080032577 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032578 }
32579 }
32580 }
32581
32582 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
32583 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080032584 for (uint32_t n = 1; n <= 4; n++) {
32585 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032586 GemmMicrokernelTester()
32587 .mr(2)
32588 .nr(4)
32589 .kr(2)
32590 .sr(1)
32591 .m(m)
32592 .n(n)
32593 .k(k)
32594 .cm_stride(7)
32595 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080032596 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032597 }
32598 }
32599 }
32600 }
32601
32602 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, a_offset) {
32603 for (size_t k = 1; k <= 40; k += 9) {
32604 GemmMicrokernelTester()
32605 .mr(2)
32606 .nr(4)
32607 .kr(2)
32608 .sr(1)
32609 .m(2)
32610 .n(4)
32611 .k(k)
32612 .ks(3)
32613 .a_offset(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080032614 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032615 }
32616 }
32617
32618 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080032619 for (size_t k = 1; k <= 40; k += 9) {
32620 for (uint32_t mz = 0; mz < 2; mz++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032621 GemmMicrokernelTester()
32622 .mr(2)
32623 .nr(4)
32624 .kr(2)
32625 .sr(1)
32626 .m(2)
32627 .n(4)
32628 .k(k)
32629 .ks(3)
32630 .a_offset(83)
32631 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080032632 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032633 }
32634 }
32635 }
32636
32637 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, qmin) {
32638 GemmMicrokernelTester()
32639 .mr(2)
32640 .nr(4)
32641 .kr(2)
32642 .sr(1)
32643 .m(2)
32644 .n(4)
32645 .k(8)
32646 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080032647 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032648 }
32649
32650 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, qmax) {
32651 GemmMicrokernelTester()
32652 .mr(2)
32653 .nr(4)
32654 .kr(2)
32655 .sr(1)
32656 .m(2)
32657 .n(4)
32658 .k(8)
32659 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080032660 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032661 }
32662
32663 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, strided_cm) {
32664 GemmMicrokernelTester()
32665 .mr(2)
32666 .nr(4)
32667 .kr(2)
32668 .sr(1)
32669 .m(2)
32670 .n(4)
32671 .k(8)
32672 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080032673 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032674 }
Marat Dukhan4c617792021-12-21 15:47:58 -080032675#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032676
32677
Marat Dukhan4c617792021-12-21 15:47:58 -080032678#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032679 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8) {
32680 GemmMicrokernelTester()
32681 .mr(3)
32682 .nr(4)
32683 .kr(2)
32684 .sr(1)
32685 .m(3)
32686 .n(4)
32687 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080032688 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032689 }
32690
32691 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, strided_cn) {
32692 GemmMicrokernelTester()
32693 .mr(3)
32694 .nr(4)
32695 .kr(2)
32696 .sr(1)
32697 .m(3)
32698 .n(4)
32699 .k(8)
32700 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080032701 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032702 }
32703
32704 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080032705 for (uint32_t n = 1; n <= 4; n++) {
32706 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032707 GemmMicrokernelTester()
32708 .mr(3)
32709 .nr(4)
32710 .kr(2)
32711 .sr(1)
32712 .m(m)
32713 .n(n)
32714 .k(8)
32715 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080032716 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032717 }
32718 }
32719 }
32720
32721 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
32722 for (uint32_t m = 1; m <= 3; m++) {
32723 GemmMicrokernelTester()
32724 .mr(3)
32725 .nr(4)
32726 .kr(2)
32727 .sr(1)
32728 .m(m)
32729 .n(4)
32730 .k(8)
32731 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080032732 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032733 }
32734 }
32735
32736 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
32737 for (uint32_t n = 1; n <= 4; n++) {
32738 GemmMicrokernelTester()
32739 .mr(3)
32740 .nr(4)
32741 .kr(2)
32742 .sr(1)
32743 .m(3)
32744 .n(n)
32745 .k(8)
32746 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080032747 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032748 }
32749 }
32750
32751 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_lt_8) {
32752 for (size_t k = 1; k < 8; k++) {
32753 GemmMicrokernelTester()
32754 .mr(3)
32755 .nr(4)
32756 .kr(2)
32757 .sr(1)
32758 .m(3)
32759 .n(4)
32760 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080032761 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032762 }
32763 }
32764
32765 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
32766 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080032767 for (uint32_t n = 1; n <= 4; n++) {
32768 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032769 GemmMicrokernelTester()
32770 .mr(3)
32771 .nr(4)
32772 .kr(2)
32773 .sr(1)
32774 .m(m)
32775 .n(n)
32776 .k(k)
32777 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080032778 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032779 }
32780 }
32781 }
32782 }
32783
32784 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_gt_8) {
32785 for (size_t k = 9; k < 16; k++) {
32786 GemmMicrokernelTester()
32787 .mr(3)
32788 .nr(4)
32789 .kr(2)
32790 .sr(1)
32791 .m(3)
32792 .n(4)
32793 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080032794 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032795 }
32796 }
32797
32798 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
32799 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080032800 for (uint32_t n = 1; n <= 4; n++) {
32801 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032802 GemmMicrokernelTester()
32803 .mr(3)
32804 .nr(4)
32805 .kr(2)
32806 .sr(1)
32807 .m(m)
32808 .n(n)
32809 .k(k)
32810 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080032811 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032812 }
32813 }
32814 }
32815 }
32816
32817 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_div_8) {
32818 for (size_t k = 16; k <= 80; k += 8) {
32819 GemmMicrokernelTester()
32820 .mr(3)
32821 .nr(4)
32822 .kr(2)
32823 .sr(1)
32824 .m(3)
32825 .n(4)
32826 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080032827 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032828 }
32829 }
32830
32831 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
32832 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080032833 for (uint32_t n = 1; n <= 4; n++) {
32834 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032835 GemmMicrokernelTester()
32836 .mr(3)
32837 .nr(4)
32838 .kr(2)
32839 .sr(1)
32840 .m(m)
32841 .n(n)
32842 .k(k)
32843 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080032844 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032845 }
32846 }
32847 }
32848 }
32849
32850 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4) {
32851 for (uint32_t n = 5; n < 8; n++) {
32852 for (size_t k = 1; k <= 40; k += 9) {
32853 GemmMicrokernelTester()
32854 .mr(3)
32855 .nr(4)
32856 .kr(2)
32857 .sr(1)
32858 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080032859 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032860 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080032861 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032862 }
32863 }
32864 }
32865
32866 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
32867 for (uint32_t n = 5; n < 8; n++) {
32868 for (size_t k = 1; k <= 40; k += 9) {
32869 GemmMicrokernelTester()
32870 .mr(3)
32871 .nr(4)
32872 .kr(2)
32873 .sr(1)
32874 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080032875 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032876 .k(k)
32877 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080032878 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032879 }
32880 }
32881 }
32882
32883 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
32884 for (uint32_t n = 5; n < 8; n++) {
32885 for (size_t k = 1; k <= 40; k += 9) {
32886 for (uint32_t m = 1; m <= 3; m++) {
32887 GemmMicrokernelTester()
32888 .mr(3)
32889 .nr(4)
32890 .kr(2)
32891 .sr(1)
32892 .m(m)
32893 .n(n)
32894 .k(k)
32895 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080032896 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032897 }
32898 }
32899 }
32900 }
32901
32902 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, n_div_4) {
32903 for (uint32_t n = 8; n <= 12; n += 4) {
32904 for (size_t k = 1; k <= 40; k += 9) {
32905 GemmMicrokernelTester()
32906 .mr(3)
32907 .nr(4)
32908 .kr(2)
32909 .sr(1)
32910 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080032911 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032912 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080032913 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032914 }
32915 }
32916 }
32917
32918 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
32919 for (uint32_t n = 8; n <= 12; n += 4) {
32920 for (size_t k = 1; k <= 40; k += 9) {
32921 GemmMicrokernelTester()
32922 .mr(3)
32923 .nr(4)
32924 .kr(2)
32925 .sr(1)
32926 .m(3)
32927 .n(n)
32928 .k(k)
32929 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080032930 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032931 }
32932 }
32933 }
32934
32935 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
32936 for (uint32_t n = 8; n <= 12; n += 4) {
32937 for (size_t k = 1; k <= 40; k += 9) {
32938 for (uint32_t m = 1; m <= 3; m++) {
32939 GemmMicrokernelTester()
32940 .mr(3)
32941 .nr(4)
32942 .kr(2)
32943 .sr(1)
32944 .m(m)
32945 .n(n)
32946 .k(k)
32947 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080032948 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032949 }
32950 }
32951 }
32952 }
32953
32954 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, small_kernel) {
32955 for (size_t k = 1; k <= 40; k += 9) {
32956 GemmMicrokernelTester()
32957 .mr(3)
32958 .nr(4)
32959 .kr(2)
32960 .sr(1)
32961 .m(3)
32962 .n(4)
32963 .k(k)
32964 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080032965 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032966 }
32967 }
32968
32969 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, small_kernel_subtile) {
32970 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080032971 for (uint32_t n = 1; n <= 4; n++) {
32972 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032973 GemmMicrokernelTester()
32974 .mr(3)
32975 .nr(4)
32976 .kr(2)
32977 .sr(1)
32978 .m(m)
32979 .n(n)
32980 .k(k)
32981 .ks(3)
32982 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080032983 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032984 }
32985 }
32986 }
32987 }
32988
32989 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_small_kernel) {
32990 for (uint32_t n = 5; n < 8; n++) {
32991 for (size_t k = 1; k <= 40; k += 9) {
32992 GemmMicrokernelTester()
32993 .mr(3)
32994 .nr(4)
32995 .kr(2)
32996 .sr(1)
32997 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080032998 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070032999 .k(k)
33000 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080033001 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070033002 }
33003 }
33004 }
33005
33006 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_small_kernel) {
33007 for (uint32_t n = 8; n <= 12; n += 4) {
33008 for (size_t k = 1; k <= 40; k += 9) {
33009 GemmMicrokernelTester()
33010 .mr(3)
33011 .nr(4)
33012 .kr(2)
33013 .sr(1)
33014 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080033015 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070033016 .k(k)
33017 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080033018 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070033019 }
33020 }
33021 }
33022
33023 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
33024 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080033025 for (uint32_t n = 1; n <= 4; n++) {
33026 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070033027 GemmMicrokernelTester()
33028 .mr(3)
33029 .nr(4)
33030 .kr(2)
33031 .sr(1)
33032 .m(m)
33033 .n(n)
33034 .k(k)
33035 .cm_stride(7)
33036 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080033037 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070033038 }
33039 }
33040 }
33041 }
33042
33043 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, a_offset) {
33044 for (size_t k = 1; k <= 40; k += 9) {
33045 GemmMicrokernelTester()
33046 .mr(3)
33047 .nr(4)
33048 .kr(2)
33049 .sr(1)
33050 .m(3)
33051 .n(4)
33052 .k(k)
33053 .ks(3)
33054 .a_offset(127)
Marat Dukhan50323b82022-01-11 00:12:01 -080033055 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070033056 }
33057 }
33058
33059 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080033060 for (size_t k = 1; k <= 40; k += 9) {
33061 for (uint32_t mz = 0; mz < 3; mz++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070033062 GemmMicrokernelTester()
33063 .mr(3)
33064 .nr(4)
33065 .kr(2)
33066 .sr(1)
33067 .m(3)
33068 .n(4)
33069 .k(k)
33070 .ks(3)
33071 .a_offset(127)
33072 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080033073 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070033074 }
33075 }
33076 }
33077
33078 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, qmin) {
33079 GemmMicrokernelTester()
33080 .mr(3)
33081 .nr(4)
33082 .kr(2)
33083 .sr(1)
33084 .m(3)
33085 .n(4)
33086 .k(8)
33087 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080033088 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070033089 }
33090
33091 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, qmax) {
33092 GemmMicrokernelTester()
33093 .mr(3)
33094 .nr(4)
33095 .kr(2)
33096 .sr(1)
33097 .m(3)
33098 .n(4)
33099 .k(8)
33100 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080033101 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070033102 }
33103
33104 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, strided_cm) {
33105 GemmMicrokernelTester()
33106 .mr(3)
33107 .nr(4)
33108 .kr(2)
33109 .sr(1)
33110 .m(3)
33111 .n(4)
33112 .k(8)
33113 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080033114 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070033115 }
Marat Dukhan4c617792021-12-21 15:47:58 -080033116#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan8dc106e2021-08-31 15:23:02 -070033117
33118
Marat Dukhan4c617792021-12-21 15:47:58 -080033119#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033120 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8) {
33121 GemmMicrokernelTester()
33122 .mr(1)
33123 .nr(4)
33124 .kr(2)
33125 .sr(4)
33126 .m(1)
33127 .n(4)
33128 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080033129 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033130 }
33131
33132 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cn) {
33133 GemmMicrokernelTester()
33134 .mr(1)
33135 .nr(4)
33136 .kr(2)
33137 .sr(4)
33138 .m(1)
33139 .n(4)
33140 .k(8)
33141 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080033142 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033143 }
33144
33145 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080033146 for (uint32_t n = 1; n <= 4; n++) {
33147 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033148 GemmMicrokernelTester()
33149 .mr(1)
33150 .nr(4)
33151 .kr(2)
33152 .sr(4)
33153 .m(m)
33154 .n(n)
33155 .k(8)
33156 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080033157 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033158 }
33159 }
33160 }
33161
33162 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
33163 for (uint32_t m = 1; m <= 1; m++) {
33164 GemmMicrokernelTester()
33165 .mr(1)
33166 .nr(4)
33167 .kr(2)
33168 .sr(4)
33169 .m(m)
33170 .n(4)
33171 .k(8)
33172 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080033173 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033174 }
33175 }
33176
33177 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
33178 for (uint32_t n = 1; n <= 4; n++) {
33179 GemmMicrokernelTester()
33180 .mr(1)
33181 .nr(4)
33182 .kr(2)
33183 .sr(4)
33184 .m(1)
33185 .n(n)
33186 .k(8)
33187 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080033188 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033189 }
33190 }
33191
33192 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8) {
33193 for (size_t k = 1; k < 8; k++) {
33194 GemmMicrokernelTester()
33195 .mr(1)
33196 .nr(4)
33197 .kr(2)
33198 .sr(4)
33199 .m(1)
33200 .n(4)
33201 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080033202 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033203 }
33204 }
33205
33206 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
33207 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080033208 for (uint32_t n = 1; n <= 4; n++) {
33209 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033210 GemmMicrokernelTester()
33211 .mr(1)
33212 .nr(4)
33213 .kr(2)
33214 .sr(4)
33215 .m(m)
33216 .n(n)
33217 .k(k)
33218 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080033219 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033220 }
33221 }
33222 }
33223 }
33224
33225 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8) {
33226 for (size_t k = 9; k < 16; k++) {
33227 GemmMicrokernelTester()
33228 .mr(1)
33229 .nr(4)
33230 .kr(2)
33231 .sr(4)
33232 .m(1)
33233 .n(4)
33234 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080033235 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033236 }
33237 }
33238
33239 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
33240 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080033241 for (uint32_t n = 1; n <= 4; n++) {
33242 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033243 GemmMicrokernelTester()
33244 .mr(1)
33245 .nr(4)
33246 .kr(2)
33247 .sr(4)
33248 .m(m)
33249 .n(n)
33250 .k(k)
33251 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080033252 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033253 }
33254 }
33255 }
33256 }
33257
33258 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8) {
33259 for (size_t k = 16; k <= 80; k += 8) {
33260 GemmMicrokernelTester()
33261 .mr(1)
33262 .nr(4)
33263 .kr(2)
33264 .sr(4)
33265 .m(1)
33266 .n(4)
33267 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080033268 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033269 }
33270 }
33271
33272 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
33273 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080033274 for (uint32_t n = 1; n <= 4; n++) {
33275 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033276 GemmMicrokernelTester()
33277 .mr(1)
33278 .nr(4)
33279 .kr(2)
33280 .sr(4)
33281 .m(m)
33282 .n(n)
33283 .k(k)
33284 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080033285 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033286 }
33287 }
33288 }
33289 }
33290
33291 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4) {
33292 for (uint32_t n = 5; n < 8; n++) {
33293 for (size_t k = 1; k <= 40; k += 9) {
33294 GemmMicrokernelTester()
33295 .mr(1)
33296 .nr(4)
33297 .kr(2)
33298 .sr(4)
33299 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080033300 .n(n)
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033301 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080033302 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033303 }
33304 }
33305 }
33306
33307 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
33308 for (uint32_t n = 5; n < 8; n++) {
33309 for (size_t k = 1; k <= 40; k += 9) {
33310 GemmMicrokernelTester()
33311 .mr(1)
33312 .nr(4)
33313 .kr(2)
33314 .sr(4)
33315 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080033316 .n(n)
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033317 .k(k)
33318 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080033319 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033320 }
33321 }
33322 }
33323
33324 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
33325 for (uint32_t n = 5; n < 8; n++) {
33326 for (size_t k = 1; k <= 40; k += 9) {
33327 for (uint32_t m = 1; m <= 1; m++) {
33328 GemmMicrokernelTester()
33329 .mr(1)
33330 .nr(4)
33331 .kr(2)
33332 .sr(4)
33333 .m(m)
33334 .n(n)
33335 .k(k)
33336 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080033337 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033338 }
33339 }
33340 }
33341 }
33342
33343 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4) {
33344 for (uint32_t n = 8; n <= 12; n += 4) {
33345 for (size_t k = 1; k <= 40; k += 9) {
33346 GemmMicrokernelTester()
33347 .mr(1)
33348 .nr(4)
33349 .kr(2)
33350 .sr(4)
33351 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080033352 .n(n)
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033353 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080033354 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033355 }
33356 }
33357 }
33358
33359 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
33360 for (uint32_t n = 8; n <= 12; n += 4) {
33361 for (size_t k = 1; k <= 40; k += 9) {
33362 GemmMicrokernelTester()
33363 .mr(1)
33364 .nr(4)
33365 .kr(2)
33366 .sr(4)
33367 .m(1)
33368 .n(n)
33369 .k(k)
33370 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080033371 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033372 }
33373 }
33374 }
33375
33376 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
33377 for (uint32_t n = 8; n <= 12; n += 4) {
33378 for (size_t k = 1; k <= 40; k += 9) {
33379 for (uint32_t m = 1; m <= 1; m++) {
33380 GemmMicrokernelTester()
33381 .mr(1)
33382 .nr(4)
33383 .kr(2)
33384 .sr(4)
33385 .m(m)
33386 .n(n)
33387 .k(k)
33388 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080033389 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033390 }
33391 }
33392 }
33393 }
33394
33395 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, small_kernel) {
33396 for (size_t k = 1; k <= 40; k += 9) {
33397 GemmMicrokernelTester()
33398 .mr(1)
33399 .nr(4)
33400 .kr(2)
33401 .sr(4)
33402 .m(1)
33403 .n(4)
33404 .k(k)
33405 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080033406 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033407 }
33408 }
33409
33410 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, small_kernel_subtile) {
33411 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080033412 for (uint32_t n = 1; n <= 4; n++) {
33413 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033414 GemmMicrokernelTester()
33415 .mr(1)
33416 .nr(4)
33417 .kr(2)
33418 .sr(4)
33419 .m(m)
33420 .n(n)
33421 .k(k)
33422 .ks(3)
33423 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080033424 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033425 }
33426 }
33427 }
33428 }
33429
33430 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_small_kernel) {
33431 for (uint32_t n = 5; n < 8; n++) {
33432 for (size_t k = 1; k <= 40; k += 9) {
33433 GemmMicrokernelTester()
33434 .mr(1)
33435 .nr(4)
33436 .kr(2)
33437 .sr(4)
33438 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080033439 .n(n)
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033440 .k(k)
33441 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080033442 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033443 }
33444 }
33445 }
33446
33447 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_small_kernel) {
33448 for (uint32_t n = 8; n <= 12; n += 4) {
33449 for (size_t k = 1; k <= 40; k += 9) {
33450 GemmMicrokernelTester()
33451 .mr(1)
33452 .nr(4)
33453 .kr(2)
33454 .sr(4)
33455 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080033456 .n(n)
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033457 .k(k)
33458 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080033459 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033460 }
33461 }
33462 }
33463
33464 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
33465 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080033466 for (uint32_t n = 1; n <= 4; n++) {
33467 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033468 GemmMicrokernelTester()
33469 .mr(1)
33470 .nr(4)
33471 .kr(2)
33472 .sr(4)
33473 .m(m)
33474 .n(n)
33475 .k(k)
33476 .cm_stride(7)
33477 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080033478 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033479 }
33480 }
33481 }
33482 }
33483
33484 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, a_offset) {
33485 for (size_t k = 1; k <= 40; k += 9) {
33486 GemmMicrokernelTester()
33487 .mr(1)
33488 .nr(4)
33489 .kr(2)
33490 .sr(4)
33491 .m(1)
33492 .n(4)
33493 .k(k)
33494 .ks(3)
33495 .a_offset(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080033496 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033497 }
33498 }
33499
33500 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080033501 for (size_t k = 1; k <= 40; k += 9) {
33502 for (uint32_t mz = 0; mz < 1; mz++) {
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033503 GemmMicrokernelTester()
33504 .mr(1)
33505 .nr(4)
33506 .kr(2)
33507 .sr(4)
33508 .m(1)
33509 .n(4)
33510 .k(k)
33511 .ks(3)
33512 .a_offset(43)
33513 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080033514 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033515 }
33516 }
33517 }
33518
33519 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, qmin) {
33520 GemmMicrokernelTester()
33521 .mr(1)
33522 .nr(4)
33523 .kr(2)
33524 .sr(4)
33525 .m(1)
33526 .n(4)
33527 .k(8)
33528 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080033529 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033530 }
33531
33532 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, qmax) {
33533 GemmMicrokernelTester()
33534 .mr(1)
33535 .nr(4)
33536 .kr(2)
33537 .sr(4)
33538 .m(1)
33539 .n(4)
33540 .k(8)
33541 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080033542 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033543 }
33544
33545 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cm) {
33546 GemmMicrokernelTester()
33547 .mr(1)
33548 .nr(4)
33549 .kr(2)
33550 .sr(4)
33551 .m(1)
33552 .n(4)
33553 .k(8)
33554 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080033555 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033556 }
Marat Dukhan4c617792021-12-21 15:47:58 -080033557#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033558
33559
Marat Dukhan4c617792021-12-21 15:47:58 -080033560#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033561 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8) {
33562 GemmMicrokernelTester()
33563 .mr(3)
33564 .nr(4)
33565 .kr(2)
33566 .sr(4)
33567 .m(3)
33568 .n(4)
33569 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080033570 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033571 }
33572
33573 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cn) {
33574 GemmMicrokernelTester()
33575 .mr(3)
33576 .nr(4)
33577 .kr(2)
33578 .sr(4)
33579 .m(3)
33580 .n(4)
33581 .k(8)
33582 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080033583 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033584 }
33585
33586 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080033587 for (uint32_t n = 1; n <= 4; n++) {
33588 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033589 GemmMicrokernelTester()
33590 .mr(3)
33591 .nr(4)
33592 .kr(2)
33593 .sr(4)
33594 .m(m)
33595 .n(n)
33596 .k(8)
33597 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080033598 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033599 }
33600 }
33601 }
33602
33603 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
33604 for (uint32_t m = 1; m <= 3; m++) {
33605 GemmMicrokernelTester()
33606 .mr(3)
33607 .nr(4)
33608 .kr(2)
33609 .sr(4)
33610 .m(m)
33611 .n(4)
33612 .k(8)
33613 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080033614 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033615 }
33616 }
33617
33618 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
33619 for (uint32_t n = 1; n <= 4; n++) {
33620 GemmMicrokernelTester()
33621 .mr(3)
33622 .nr(4)
33623 .kr(2)
33624 .sr(4)
33625 .m(3)
33626 .n(n)
33627 .k(8)
33628 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080033629 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033630 }
33631 }
33632
33633 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8) {
33634 for (size_t k = 1; k < 8; k++) {
33635 GemmMicrokernelTester()
33636 .mr(3)
33637 .nr(4)
33638 .kr(2)
33639 .sr(4)
33640 .m(3)
33641 .n(4)
33642 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080033643 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033644 }
33645 }
33646
33647 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
33648 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080033649 for (uint32_t n = 1; n <= 4; n++) {
33650 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033651 GemmMicrokernelTester()
33652 .mr(3)
33653 .nr(4)
33654 .kr(2)
33655 .sr(4)
33656 .m(m)
33657 .n(n)
33658 .k(k)
33659 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080033660 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033661 }
33662 }
33663 }
33664 }
33665
33666 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8) {
33667 for (size_t k = 9; k < 16; k++) {
33668 GemmMicrokernelTester()
33669 .mr(3)
33670 .nr(4)
33671 .kr(2)
33672 .sr(4)
33673 .m(3)
33674 .n(4)
33675 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080033676 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033677 }
33678 }
33679
33680 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
33681 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080033682 for (uint32_t n = 1; n <= 4; n++) {
33683 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033684 GemmMicrokernelTester()
33685 .mr(3)
33686 .nr(4)
33687 .kr(2)
33688 .sr(4)
33689 .m(m)
33690 .n(n)
33691 .k(k)
33692 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080033693 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033694 }
33695 }
33696 }
33697 }
33698
33699 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8) {
33700 for (size_t k = 16; k <= 80; k += 8) {
33701 GemmMicrokernelTester()
33702 .mr(3)
33703 .nr(4)
33704 .kr(2)
33705 .sr(4)
33706 .m(3)
33707 .n(4)
33708 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080033709 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033710 }
33711 }
33712
33713 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
33714 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080033715 for (uint32_t n = 1; n <= 4; n++) {
33716 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033717 GemmMicrokernelTester()
33718 .mr(3)
33719 .nr(4)
33720 .kr(2)
33721 .sr(4)
33722 .m(m)
33723 .n(n)
33724 .k(k)
33725 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080033726 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033727 }
33728 }
33729 }
33730 }
33731
33732 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4) {
33733 for (uint32_t n = 5; n < 8; n++) {
33734 for (size_t k = 1; k <= 40; k += 9) {
33735 GemmMicrokernelTester()
33736 .mr(3)
33737 .nr(4)
33738 .kr(2)
33739 .sr(4)
33740 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080033741 .n(n)
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033742 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080033743 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033744 }
33745 }
33746 }
33747
33748 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
33749 for (uint32_t n = 5; n < 8; n++) {
33750 for (size_t k = 1; k <= 40; k += 9) {
33751 GemmMicrokernelTester()
33752 .mr(3)
33753 .nr(4)
33754 .kr(2)
33755 .sr(4)
33756 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080033757 .n(n)
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033758 .k(k)
33759 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080033760 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033761 }
33762 }
33763 }
33764
33765 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
33766 for (uint32_t n = 5; n < 8; n++) {
33767 for (size_t k = 1; k <= 40; k += 9) {
33768 for (uint32_t m = 1; m <= 3; m++) {
33769 GemmMicrokernelTester()
33770 .mr(3)
33771 .nr(4)
33772 .kr(2)
33773 .sr(4)
33774 .m(m)
33775 .n(n)
33776 .k(k)
33777 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080033778 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033779 }
33780 }
33781 }
33782 }
33783
33784 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4) {
33785 for (uint32_t n = 8; n <= 12; n += 4) {
33786 for (size_t k = 1; k <= 40; k += 9) {
33787 GemmMicrokernelTester()
33788 .mr(3)
33789 .nr(4)
33790 .kr(2)
33791 .sr(4)
33792 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080033793 .n(n)
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033794 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080033795 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033796 }
33797 }
33798 }
33799
33800 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
33801 for (uint32_t n = 8; n <= 12; n += 4) {
33802 for (size_t k = 1; k <= 40; k += 9) {
33803 GemmMicrokernelTester()
33804 .mr(3)
33805 .nr(4)
33806 .kr(2)
33807 .sr(4)
33808 .m(3)
33809 .n(n)
33810 .k(k)
33811 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080033812 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033813 }
33814 }
33815 }
33816
33817 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
33818 for (uint32_t n = 8; n <= 12; n += 4) {
33819 for (size_t k = 1; k <= 40; k += 9) {
33820 for (uint32_t m = 1; m <= 3; m++) {
33821 GemmMicrokernelTester()
33822 .mr(3)
33823 .nr(4)
33824 .kr(2)
33825 .sr(4)
33826 .m(m)
33827 .n(n)
33828 .k(k)
33829 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080033830 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033831 }
33832 }
33833 }
33834 }
33835
33836 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, small_kernel) {
33837 for (size_t k = 1; k <= 40; k += 9) {
33838 GemmMicrokernelTester()
33839 .mr(3)
33840 .nr(4)
33841 .kr(2)
33842 .sr(4)
33843 .m(3)
33844 .n(4)
33845 .k(k)
33846 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080033847 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033848 }
33849 }
33850
33851 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, small_kernel_subtile) {
33852 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080033853 for (uint32_t n = 1; n <= 4; n++) {
33854 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033855 GemmMicrokernelTester()
33856 .mr(3)
33857 .nr(4)
33858 .kr(2)
33859 .sr(4)
33860 .m(m)
33861 .n(n)
33862 .k(k)
33863 .ks(3)
33864 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080033865 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033866 }
33867 }
33868 }
33869 }
33870
33871 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_small_kernel) {
33872 for (uint32_t n = 5; n < 8; n++) {
33873 for (size_t k = 1; k <= 40; k += 9) {
33874 GemmMicrokernelTester()
33875 .mr(3)
33876 .nr(4)
33877 .kr(2)
33878 .sr(4)
33879 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080033880 .n(n)
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033881 .k(k)
33882 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080033883 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033884 }
33885 }
33886 }
33887
33888 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_small_kernel) {
33889 for (uint32_t n = 8; n <= 12; n += 4) {
33890 for (size_t k = 1; k <= 40; k += 9) {
33891 GemmMicrokernelTester()
33892 .mr(3)
33893 .nr(4)
33894 .kr(2)
33895 .sr(4)
33896 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080033897 .n(n)
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033898 .k(k)
33899 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080033900 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033901 }
33902 }
33903 }
33904
33905 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
33906 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080033907 for (uint32_t n = 1; n <= 4; n++) {
33908 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033909 GemmMicrokernelTester()
33910 .mr(3)
33911 .nr(4)
33912 .kr(2)
33913 .sr(4)
33914 .m(m)
33915 .n(n)
33916 .k(k)
33917 .cm_stride(7)
33918 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080033919 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033920 }
33921 }
33922 }
33923 }
33924
33925 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, a_offset) {
33926 for (size_t k = 1; k <= 40; k += 9) {
33927 GemmMicrokernelTester()
33928 .mr(3)
33929 .nr(4)
33930 .kr(2)
33931 .sr(4)
33932 .m(3)
33933 .n(4)
33934 .k(k)
33935 .ks(3)
33936 .a_offset(127)
Marat Dukhan50323b82022-01-11 00:12:01 -080033937 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033938 }
33939 }
33940
33941 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080033942 for (size_t k = 1; k <= 40; k += 9) {
33943 for (uint32_t mz = 0; mz < 3; mz++) {
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033944 GemmMicrokernelTester()
33945 .mr(3)
33946 .nr(4)
33947 .kr(2)
33948 .sr(4)
33949 .m(3)
33950 .n(4)
33951 .k(k)
33952 .ks(3)
33953 .a_offset(127)
33954 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080033955 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033956 }
33957 }
33958 }
33959
33960 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, qmin) {
33961 GemmMicrokernelTester()
33962 .mr(3)
33963 .nr(4)
33964 .kr(2)
33965 .sr(4)
33966 .m(3)
33967 .n(4)
33968 .k(8)
33969 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080033970 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033971 }
33972
33973 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, qmax) {
33974 GemmMicrokernelTester()
33975 .mr(3)
33976 .nr(4)
33977 .kr(2)
33978 .sr(4)
33979 .m(3)
33980 .n(4)
33981 .k(8)
33982 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080033983 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033984 }
33985
33986 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cm) {
33987 GemmMicrokernelTester()
33988 .mr(3)
33989 .nr(4)
33990 .kr(2)
33991 .sr(4)
33992 .m(3)
33993 .n(4)
33994 .k(8)
33995 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080033996 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033997 }
Marat Dukhan4c617792021-12-21 15:47:58 -080033998#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan0f1ed942021-12-08 23:25:50 -080033999
34000
Marat Dukhan4c617792021-12-21 15:47:58 -080034001#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034002 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8) {
34003 GemmMicrokernelTester()
34004 .mr(2)
34005 .nr(4)
34006 .kr(8)
34007 .sr(1)
34008 .m(2)
34009 .n(4)
34010 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080034011 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034012 }
34013
34014 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, strided_cn) {
34015 GemmMicrokernelTester()
34016 .mr(2)
34017 .nr(4)
34018 .kr(8)
34019 .sr(1)
34020 .m(2)
34021 .n(4)
34022 .k(8)
34023 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080034024 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034025 }
34026
34027 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080034028 for (uint32_t n = 1; n <= 4; n++) {
34029 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034030 GemmMicrokernelTester()
34031 .mr(2)
34032 .nr(4)
34033 .kr(8)
34034 .sr(1)
34035 .m(m)
34036 .n(n)
34037 .k(8)
34038 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080034039 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034040 }
34041 }
34042 }
34043
34044 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
34045 for (uint32_t m = 1; m <= 2; m++) {
34046 GemmMicrokernelTester()
34047 .mr(2)
34048 .nr(4)
34049 .kr(8)
34050 .sr(1)
34051 .m(m)
34052 .n(4)
34053 .k(8)
34054 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080034055 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034056 }
34057 }
34058
34059 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
34060 for (uint32_t n = 1; n <= 4; n++) {
34061 GemmMicrokernelTester()
34062 .mr(2)
34063 .nr(4)
34064 .kr(8)
34065 .sr(1)
34066 .m(2)
34067 .n(n)
34068 .k(8)
34069 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080034070 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034071 }
34072 }
34073
34074 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8) {
34075 for (size_t k = 1; k < 8; k++) {
34076 GemmMicrokernelTester()
34077 .mr(2)
34078 .nr(4)
34079 .kr(8)
34080 .sr(1)
34081 .m(2)
34082 .n(4)
34083 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080034084 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034085 }
34086 }
34087
34088 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
34089 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080034090 for (uint32_t n = 1; n <= 4; n++) {
34091 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034092 GemmMicrokernelTester()
34093 .mr(2)
34094 .nr(4)
34095 .kr(8)
34096 .sr(1)
34097 .m(m)
34098 .n(n)
34099 .k(k)
34100 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080034101 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034102 }
34103 }
34104 }
34105 }
34106
34107 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8) {
34108 for (size_t k = 9; k < 16; k++) {
34109 GemmMicrokernelTester()
34110 .mr(2)
34111 .nr(4)
34112 .kr(8)
34113 .sr(1)
34114 .m(2)
34115 .n(4)
34116 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080034117 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034118 }
34119 }
34120
34121 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
34122 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080034123 for (uint32_t n = 1; n <= 4; n++) {
34124 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034125 GemmMicrokernelTester()
34126 .mr(2)
34127 .nr(4)
34128 .kr(8)
34129 .sr(1)
34130 .m(m)
34131 .n(n)
34132 .k(k)
34133 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080034134 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034135 }
34136 }
34137 }
34138 }
34139
34140 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_div_8) {
34141 for (size_t k = 16; k <= 80; k += 8) {
34142 GemmMicrokernelTester()
34143 .mr(2)
34144 .nr(4)
34145 .kr(8)
34146 .sr(1)
34147 .m(2)
34148 .n(4)
34149 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080034150 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034151 }
34152 }
34153
34154 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
34155 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080034156 for (uint32_t n = 1; n <= 4; n++) {
34157 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034158 GemmMicrokernelTester()
34159 .mr(2)
34160 .nr(4)
34161 .kr(8)
34162 .sr(1)
34163 .m(m)
34164 .n(n)
34165 .k(k)
34166 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080034167 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034168 }
34169 }
34170 }
34171 }
34172
34173 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4) {
34174 for (uint32_t n = 5; n < 8; n++) {
34175 for (size_t k = 1; k <= 40; k += 9) {
34176 GemmMicrokernelTester()
34177 .mr(2)
34178 .nr(4)
34179 .kr(8)
34180 .sr(1)
34181 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080034182 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034183 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080034184 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034185 }
34186 }
34187 }
34188
34189 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
34190 for (uint32_t n = 5; n < 8; n++) {
34191 for (size_t k = 1; k <= 40; k += 9) {
34192 GemmMicrokernelTester()
34193 .mr(2)
34194 .nr(4)
34195 .kr(8)
34196 .sr(1)
34197 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080034198 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034199 .k(k)
34200 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080034201 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034202 }
34203 }
34204 }
34205
34206 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
34207 for (uint32_t n = 5; n < 8; n++) {
34208 for (size_t k = 1; k <= 40; k += 9) {
34209 for (uint32_t m = 1; m <= 2; m++) {
34210 GemmMicrokernelTester()
34211 .mr(2)
34212 .nr(4)
34213 .kr(8)
34214 .sr(1)
34215 .m(m)
34216 .n(n)
34217 .k(k)
34218 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080034219 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034220 }
34221 }
34222 }
34223 }
34224
34225 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, n_div_4) {
34226 for (uint32_t n = 8; n <= 12; n += 4) {
34227 for (size_t k = 1; k <= 40; k += 9) {
34228 GemmMicrokernelTester()
34229 .mr(2)
34230 .nr(4)
34231 .kr(8)
34232 .sr(1)
34233 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080034234 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034235 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080034236 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034237 }
34238 }
34239 }
34240
34241 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
34242 for (uint32_t n = 8; n <= 12; n += 4) {
34243 for (size_t k = 1; k <= 40; k += 9) {
34244 GemmMicrokernelTester()
34245 .mr(2)
34246 .nr(4)
34247 .kr(8)
34248 .sr(1)
34249 .m(2)
34250 .n(n)
34251 .k(k)
34252 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080034253 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034254 }
34255 }
34256 }
34257
34258 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
34259 for (uint32_t n = 8; n <= 12; n += 4) {
34260 for (size_t k = 1; k <= 40; k += 9) {
34261 for (uint32_t m = 1; m <= 2; m++) {
34262 GemmMicrokernelTester()
34263 .mr(2)
34264 .nr(4)
34265 .kr(8)
34266 .sr(1)
34267 .m(m)
34268 .n(n)
34269 .k(k)
34270 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080034271 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034272 }
34273 }
34274 }
34275 }
34276
34277 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, small_kernel) {
34278 for (size_t k = 1; k <= 40; k += 9) {
34279 GemmMicrokernelTester()
34280 .mr(2)
34281 .nr(4)
34282 .kr(8)
34283 .sr(1)
34284 .m(2)
34285 .n(4)
34286 .k(k)
34287 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080034288 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034289 }
34290 }
34291
34292 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, small_kernel_subtile) {
34293 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080034294 for (uint32_t n = 1; n <= 4; n++) {
34295 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034296 GemmMicrokernelTester()
34297 .mr(2)
34298 .nr(4)
34299 .kr(8)
34300 .sr(1)
34301 .m(m)
34302 .n(n)
34303 .k(k)
34304 .ks(3)
34305 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080034306 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034307 }
34308 }
34309 }
34310 }
34311
34312 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_small_kernel) {
34313 for (uint32_t n = 5; n < 8; n++) {
34314 for (size_t k = 1; k <= 40; k += 9) {
34315 GemmMicrokernelTester()
34316 .mr(2)
34317 .nr(4)
34318 .kr(8)
34319 .sr(1)
34320 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080034321 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034322 .k(k)
34323 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080034324 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034325 }
34326 }
34327 }
34328
34329 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_small_kernel) {
34330 for (uint32_t n = 8; n <= 12; n += 4) {
34331 for (size_t k = 1; k <= 40; k += 9) {
34332 GemmMicrokernelTester()
34333 .mr(2)
34334 .nr(4)
34335 .kr(8)
34336 .sr(1)
34337 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080034338 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034339 .k(k)
34340 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080034341 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034342 }
34343 }
34344 }
34345
34346 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
34347 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080034348 for (uint32_t n = 1; n <= 4; n++) {
34349 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034350 GemmMicrokernelTester()
34351 .mr(2)
34352 .nr(4)
34353 .kr(8)
34354 .sr(1)
34355 .m(m)
34356 .n(n)
34357 .k(k)
34358 .cm_stride(7)
34359 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080034360 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034361 }
34362 }
34363 }
34364 }
34365
34366 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, a_offset) {
34367 for (size_t k = 1; k <= 40; k += 9) {
34368 GemmMicrokernelTester()
34369 .mr(2)
34370 .nr(4)
34371 .kr(8)
34372 .sr(1)
34373 .m(2)
34374 .n(4)
34375 .k(k)
34376 .ks(3)
34377 .a_offset(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080034378 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034379 }
34380 }
34381
34382 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080034383 for (size_t k = 1; k <= 40; k += 9) {
34384 for (uint32_t mz = 0; mz < 2; mz++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034385 GemmMicrokernelTester()
34386 .mr(2)
34387 .nr(4)
34388 .kr(8)
34389 .sr(1)
34390 .m(2)
34391 .n(4)
34392 .k(k)
34393 .ks(3)
34394 .a_offset(83)
34395 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080034396 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034397 }
34398 }
34399 }
34400
34401 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, qmin) {
34402 GemmMicrokernelTester()
34403 .mr(2)
34404 .nr(4)
34405 .kr(8)
34406 .sr(1)
34407 .m(2)
34408 .n(4)
34409 .k(8)
34410 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080034411 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034412 }
34413
34414 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, qmax) {
34415 GemmMicrokernelTester()
34416 .mr(2)
34417 .nr(4)
34418 .kr(8)
34419 .sr(1)
34420 .m(2)
34421 .n(4)
34422 .k(8)
34423 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080034424 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034425 }
34426
34427 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, strided_cm) {
34428 GemmMicrokernelTester()
34429 .mr(2)
34430 .nr(4)
34431 .kr(8)
34432 .sr(1)
34433 .m(2)
34434 .n(4)
34435 .k(8)
34436 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080034437 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034438 }
Marat Dukhan4c617792021-12-21 15:47:58 -080034439#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034440
34441
Marat Dukhan4c617792021-12-21 15:47:58 -080034442#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034443 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8) {
34444 GemmMicrokernelTester()
34445 .mr(4)
34446 .nr(4)
34447 .kr(8)
34448 .sr(1)
34449 .m(4)
34450 .n(4)
34451 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080034452 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034453 }
34454
34455 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, strided_cn) {
34456 GemmMicrokernelTester()
34457 .mr(4)
34458 .nr(4)
34459 .kr(8)
34460 .sr(1)
34461 .m(4)
34462 .n(4)
34463 .k(8)
34464 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080034465 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034466 }
34467
34468 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080034469 for (uint32_t n = 1; n <= 4; n++) {
34470 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034471 GemmMicrokernelTester()
34472 .mr(4)
34473 .nr(4)
34474 .kr(8)
34475 .sr(1)
34476 .m(m)
34477 .n(n)
34478 .k(8)
34479 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080034480 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034481 }
34482 }
34483 }
34484
34485 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
34486 for (uint32_t m = 1; m <= 4; m++) {
34487 GemmMicrokernelTester()
34488 .mr(4)
34489 .nr(4)
34490 .kr(8)
34491 .sr(1)
34492 .m(m)
34493 .n(4)
34494 .k(8)
34495 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080034496 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034497 }
34498 }
34499
34500 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
34501 for (uint32_t n = 1; n <= 4; n++) {
34502 GemmMicrokernelTester()
34503 .mr(4)
34504 .nr(4)
34505 .kr(8)
34506 .sr(1)
34507 .m(4)
34508 .n(n)
34509 .k(8)
34510 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080034511 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034512 }
34513 }
34514
34515 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8) {
34516 for (size_t k = 1; k < 8; k++) {
34517 GemmMicrokernelTester()
34518 .mr(4)
34519 .nr(4)
34520 .kr(8)
34521 .sr(1)
34522 .m(4)
34523 .n(4)
34524 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080034525 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034526 }
34527 }
34528
34529 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
34530 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080034531 for (uint32_t n = 1; n <= 4; n++) {
34532 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034533 GemmMicrokernelTester()
34534 .mr(4)
34535 .nr(4)
34536 .kr(8)
34537 .sr(1)
34538 .m(m)
34539 .n(n)
34540 .k(k)
34541 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080034542 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034543 }
34544 }
34545 }
34546 }
34547
34548 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8) {
34549 for (size_t k = 9; k < 16; k++) {
34550 GemmMicrokernelTester()
34551 .mr(4)
34552 .nr(4)
34553 .kr(8)
34554 .sr(1)
34555 .m(4)
34556 .n(4)
34557 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080034558 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034559 }
34560 }
34561
34562 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
34563 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080034564 for (uint32_t n = 1; n <= 4; n++) {
34565 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034566 GemmMicrokernelTester()
34567 .mr(4)
34568 .nr(4)
34569 .kr(8)
34570 .sr(1)
34571 .m(m)
34572 .n(n)
34573 .k(k)
34574 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080034575 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034576 }
34577 }
34578 }
34579 }
34580
34581 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_div_8) {
34582 for (size_t k = 16; k <= 80; k += 8) {
34583 GemmMicrokernelTester()
34584 .mr(4)
34585 .nr(4)
34586 .kr(8)
34587 .sr(1)
34588 .m(4)
34589 .n(4)
34590 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080034591 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034592 }
34593 }
34594
34595 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
34596 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080034597 for (uint32_t n = 1; n <= 4; n++) {
34598 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034599 GemmMicrokernelTester()
34600 .mr(4)
34601 .nr(4)
34602 .kr(8)
34603 .sr(1)
34604 .m(m)
34605 .n(n)
34606 .k(k)
34607 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080034608 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034609 }
34610 }
34611 }
34612 }
34613
34614 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4) {
34615 for (uint32_t n = 5; n < 8; n++) {
34616 for (size_t k = 1; k <= 40; k += 9) {
34617 GemmMicrokernelTester()
34618 .mr(4)
34619 .nr(4)
34620 .kr(8)
34621 .sr(1)
34622 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080034623 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034624 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080034625 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034626 }
34627 }
34628 }
34629
34630 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
34631 for (uint32_t n = 5; n < 8; n++) {
34632 for (size_t k = 1; k <= 40; k += 9) {
34633 GemmMicrokernelTester()
34634 .mr(4)
34635 .nr(4)
34636 .kr(8)
34637 .sr(1)
34638 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080034639 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034640 .k(k)
34641 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080034642 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034643 }
34644 }
34645 }
34646
34647 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
34648 for (uint32_t n = 5; n < 8; n++) {
34649 for (size_t k = 1; k <= 40; k += 9) {
34650 for (uint32_t m = 1; m <= 4; m++) {
34651 GemmMicrokernelTester()
34652 .mr(4)
34653 .nr(4)
34654 .kr(8)
34655 .sr(1)
34656 .m(m)
34657 .n(n)
34658 .k(k)
34659 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080034660 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034661 }
34662 }
34663 }
34664 }
34665
34666 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, n_div_4) {
34667 for (uint32_t n = 8; n <= 12; n += 4) {
34668 for (size_t k = 1; k <= 40; k += 9) {
34669 GemmMicrokernelTester()
34670 .mr(4)
34671 .nr(4)
34672 .kr(8)
34673 .sr(1)
34674 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080034675 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034676 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080034677 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034678 }
34679 }
34680 }
34681
34682 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
34683 for (uint32_t n = 8; n <= 12; n += 4) {
34684 for (size_t k = 1; k <= 40; k += 9) {
34685 GemmMicrokernelTester()
34686 .mr(4)
34687 .nr(4)
34688 .kr(8)
34689 .sr(1)
34690 .m(4)
34691 .n(n)
34692 .k(k)
34693 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080034694 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034695 }
34696 }
34697 }
34698
34699 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
34700 for (uint32_t n = 8; n <= 12; n += 4) {
34701 for (size_t k = 1; k <= 40; k += 9) {
34702 for (uint32_t m = 1; m <= 4; m++) {
34703 GemmMicrokernelTester()
34704 .mr(4)
34705 .nr(4)
34706 .kr(8)
34707 .sr(1)
34708 .m(m)
34709 .n(n)
34710 .k(k)
34711 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080034712 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034713 }
34714 }
34715 }
34716 }
34717
34718 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, small_kernel) {
34719 for (size_t k = 1; k <= 40; k += 9) {
34720 GemmMicrokernelTester()
34721 .mr(4)
34722 .nr(4)
34723 .kr(8)
34724 .sr(1)
34725 .m(4)
34726 .n(4)
34727 .k(k)
34728 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080034729 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034730 }
34731 }
34732
34733 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, small_kernel_subtile) {
34734 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080034735 for (uint32_t n = 1; n <= 4; n++) {
34736 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034737 GemmMicrokernelTester()
34738 .mr(4)
34739 .nr(4)
34740 .kr(8)
34741 .sr(1)
34742 .m(m)
34743 .n(n)
34744 .k(k)
34745 .ks(3)
34746 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080034747 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034748 }
34749 }
34750 }
34751 }
34752
34753 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_small_kernel) {
34754 for (uint32_t n = 5; n < 8; n++) {
34755 for (size_t k = 1; k <= 40; k += 9) {
34756 GemmMicrokernelTester()
34757 .mr(4)
34758 .nr(4)
34759 .kr(8)
34760 .sr(1)
34761 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080034762 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034763 .k(k)
34764 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080034765 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034766 }
34767 }
34768 }
34769
34770 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_small_kernel) {
34771 for (uint32_t n = 8; n <= 12; n += 4) {
34772 for (size_t k = 1; k <= 40; k += 9) {
34773 GemmMicrokernelTester()
34774 .mr(4)
34775 .nr(4)
34776 .kr(8)
34777 .sr(1)
34778 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080034779 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034780 .k(k)
34781 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080034782 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034783 }
34784 }
34785 }
34786
34787 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
34788 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080034789 for (uint32_t n = 1; n <= 4; n++) {
34790 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034791 GemmMicrokernelTester()
34792 .mr(4)
34793 .nr(4)
34794 .kr(8)
34795 .sr(1)
34796 .m(m)
34797 .n(n)
34798 .k(k)
34799 .cm_stride(7)
34800 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080034801 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034802 }
34803 }
34804 }
34805 }
34806
34807 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, a_offset) {
34808 for (size_t k = 1; k <= 40; k += 9) {
34809 GemmMicrokernelTester()
34810 .mr(4)
34811 .nr(4)
34812 .kr(8)
34813 .sr(1)
34814 .m(4)
34815 .n(4)
34816 .k(k)
34817 .ks(3)
34818 .a_offset(163)
Marat Dukhan50323b82022-01-11 00:12:01 -080034819 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034820 }
34821 }
34822
34823 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080034824 for (size_t k = 1; k <= 40; k += 9) {
34825 for (uint32_t mz = 0; mz < 4; mz++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034826 GemmMicrokernelTester()
34827 .mr(4)
34828 .nr(4)
34829 .kr(8)
34830 .sr(1)
34831 .m(4)
34832 .n(4)
34833 .k(k)
34834 .ks(3)
34835 .a_offset(163)
34836 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080034837 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034838 }
34839 }
34840 }
34841
34842 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, qmin) {
34843 GemmMicrokernelTester()
34844 .mr(4)
34845 .nr(4)
34846 .kr(8)
34847 .sr(1)
34848 .m(4)
34849 .n(4)
34850 .k(8)
34851 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080034852 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034853 }
34854
34855 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, qmax) {
34856 GemmMicrokernelTester()
34857 .mr(4)
34858 .nr(4)
34859 .kr(8)
34860 .sr(1)
34861 .m(4)
34862 .n(4)
34863 .k(8)
34864 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080034865 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034866 }
34867
34868 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, strided_cm) {
34869 GemmMicrokernelTester()
34870 .mr(4)
34871 .nr(4)
34872 .kr(8)
34873 .sr(1)
34874 .m(4)
34875 .n(4)
34876 .k(8)
34877 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080034878 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034879 }
Marat Dukhan4c617792021-12-21 15:47:58 -080034880#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034881
34882
Marat Dukhan4c617792021-12-21 15:47:58 -080034883#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034884 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8) {
34885 GemmMicrokernelTester()
34886 .mr(1)
34887 .nr(4)
34888 .kr(8)
34889 .sr(1)
34890 .m(1)
34891 .n(4)
34892 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080034893 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034894 }
34895
34896 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, strided_cn) {
34897 GemmMicrokernelTester()
34898 .mr(1)
34899 .nr(4)
34900 .kr(8)
34901 .sr(1)
34902 .m(1)
34903 .n(4)
34904 .k(8)
34905 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080034906 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034907 }
34908
34909 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080034910 for (uint32_t n = 1; n <= 4; n++) {
34911 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034912 GemmMicrokernelTester()
34913 .mr(1)
34914 .nr(4)
34915 .kr(8)
34916 .sr(1)
34917 .m(m)
34918 .n(n)
34919 .k(8)
34920 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080034921 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034922 }
34923 }
34924 }
34925
34926 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
34927 for (uint32_t m = 1; m <= 1; m++) {
34928 GemmMicrokernelTester()
34929 .mr(1)
34930 .nr(4)
34931 .kr(8)
34932 .sr(1)
34933 .m(m)
34934 .n(4)
34935 .k(8)
34936 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080034937 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034938 }
34939 }
34940
34941 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
34942 for (uint32_t n = 1; n <= 4; n++) {
34943 GemmMicrokernelTester()
34944 .mr(1)
34945 .nr(4)
34946 .kr(8)
34947 .sr(1)
34948 .m(1)
34949 .n(n)
34950 .k(8)
34951 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080034952 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034953 }
34954 }
34955
34956 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_lt_8) {
34957 for (size_t k = 1; k < 8; k++) {
34958 GemmMicrokernelTester()
34959 .mr(1)
34960 .nr(4)
34961 .kr(8)
34962 .sr(1)
34963 .m(1)
34964 .n(4)
34965 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080034966 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034967 }
34968 }
34969
34970 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
34971 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080034972 for (uint32_t n = 1; n <= 4; n++) {
34973 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034974 GemmMicrokernelTester()
34975 .mr(1)
34976 .nr(4)
34977 .kr(8)
34978 .sr(1)
34979 .m(m)
34980 .n(n)
34981 .k(k)
34982 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080034983 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070034984 }
34985 }
34986 }
34987 }
34988
34989 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_gt_8) {
34990 for (size_t k = 9; k < 16; k++) {
34991 GemmMicrokernelTester()
34992 .mr(1)
34993 .nr(4)
34994 .kr(8)
34995 .sr(1)
34996 .m(1)
34997 .n(4)
34998 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080034999 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035000 }
35001 }
35002
35003 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
35004 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080035005 for (uint32_t n = 1; n <= 4; n++) {
35006 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035007 GemmMicrokernelTester()
35008 .mr(1)
35009 .nr(4)
35010 .kr(8)
35011 .sr(1)
35012 .m(m)
35013 .n(n)
35014 .k(k)
35015 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080035016 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035017 }
35018 }
35019 }
35020 }
35021
35022 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_div_8) {
35023 for (size_t k = 16; k <= 80; k += 8) {
35024 GemmMicrokernelTester()
35025 .mr(1)
35026 .nr(4)
35027 .kr(8)
35028 .sr(1)
35029 .m(1)
35030 .n(4)
35031 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080035032 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035033 }
35034 }
35035
35036 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
35037 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080035038 for (uint32_t n = 1; n <= 4; n++) {
35039 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035040 GemmMicrokernelTester()
35041 .mr(1)
35042 .nr(4)
35043 .kr(8)
35044 .sr(1)
35045 .m(m)
35046 .n(n)
35047 .k(k)
35048 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080035049 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035050 }
35051 }
35052 }
35053 }
35054
35055 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4) {
35056 for (uint32_t n = 5; n < 8; n++) {
35057 for (size_t k = 1; k <= 40; k += 9) {
35058 GemmMicrokernelTester()
35059 .mr(1)
35060 .nr(4)
35061 .kr(8)
35062 .sr(1)
35063 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080035064 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035065 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080035066 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035067 }
35068 }
35069 }
35070
35071 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
35072 for (uint32_t n = 5; n < 8; n++) {
35073 for (size_t k = 1; k <= 40; k += 9) {
35074 GemmMicrokernelTester()
35075 .mr(1)
35076 .nr(4)
35077 .kr(8)
35078 .sr(1)
35079 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080035080 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035081 .k(k)
35082 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080035083 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035084 }
35085 }
35086 }
35087
35088 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
35089 for (uint32_t n = 5; n < 8; n++) {
35090 for (size_t k = 1; k <= 40; k += 9) {
35091 for (uint32_t m = 1; m <= 1; m++) {
35092 GemmMicrokernelTester()
35093 .mr(1)
35094 .nr(4)
35095 .kr(8)
35096 .sr(1)
35097 .m(m)
35098 .n(n)
35099 .k(k)
35100 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080035101 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035102 }
35103 }
35104 }
35105 }
35106
35107 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, n_div_4) {
35108 for (uint32_t n = 8; n <= 12; n += 4) {
35109 for (size_t k = 1; k <= 40; k += 9) {
35110 GemmMicrokernelTester()
35111 .mr(1)
35112 .nr(4)
35113 .kr(8)
35114 .sr(1)
35115 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080035116 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035117 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080035118 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035119 }
35120 }
35121 }
35122
35123 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
35124 for (uint32_t n = 8; n <= 12; n += 4) {
35125 for (size_t k = 1; k <= 40; k += 9) {
35126 GemmMicrokernelTester()
35127 .mr(1)
35128 .nr(4)
35129 .kr(8)
35130 .sr(1)
35131 .m(1)
35132 .n(n)
35133 .k(k)
35134 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080035135 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035136 }
35137 }
35138 }
35139
35140 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
35141 for (uint32_t n = 8; n <= 12; n += 4) {
35142 for (size_t k = 1; k <= 40; k += 9) {
35143 for (uint32_t m = 1; m <= 1; m++) {
35144 GemmMicrokernelTester()
35145 .mr(1)
35146 .nr(4)
35147 .kr(8)
35148 .sr(1)
35149 .m(m)
35150 .n(n)
35151 .k(k)
35152 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080035153 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035154 }
35155 }
35156 }
35157 }
35158
35159 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, small_kernel) {
35160 for (size_t k = 1; k <= 40; k += 9) {
35161 GemmMicrokernelTester()
35162 .mr(1)
35163 .nr(4)
35164 .kr(8)
35165 .sr(1)
35166 .m(1)
35167 .n(4)
35168 .k(k)
35169 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080035170 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035171 }
35172 }
35173
35174 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, small_kernel_subtile) {
35175 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080035176 for (uint32_t n = 1; n <= 4; n++) {
35177 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035178 GemmMicrokernelTester()
35179 .mr(1)
35180 .nr(4)
35181 .kr(8)
35182 .sr(1)
35183 .m(m)
35184 .n(n)
35185 .k(k)
35186 .ks(3)
35187 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080035188 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035189 }
35190 }
35191 }
35192 }
35193
35194 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_small_kernel) {
35195 for (uint32_t n = 5; n < 8; n++) {
35196 for (size_t k = 1; k <= 40; k += 9) {
35197 GemmMicrokernelTester()
35198 .mr(1)
35199 .nr(4)
35200 .kr(8)
35201 .sr(1)
35202 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080035203 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035204 .k(k)
35205 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080035206 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035207 }
35208 }
35209 }
35210
35211 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_small_kernel) {
35212 for (uint32_t n = 8; n <= 12; n += 4) {
35213 for (size_t k = 1; k <= 40; k += 9) {
35214 GemmMicrokernelTester()
35215 .mr(1)
35216 .nr(4)
35217 .kr(8)
35218 .sr(1)
35219 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080035220 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035221 .k(k)
35222 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080035223 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035224 }
35225 }
35226 }
35227
35228 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
35229 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080035230 for (uint32_t n = 1; n <= 4; n++) {
35231 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035232 GemmMicrokernelTester()
35233 .mr(1)
35234 .nr(4)
35235 .kr(8)
35236 .sr(1)
35237 .m(m)
35238 .n(n)
35239 .k(k)
35240 .cm_stride(7)
35241 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080035242 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035243 }
35244 }
35245 }
35246 }
35247
35248 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, a_offset) {
35249 for (size_t k = 1; k <= 40; k += 9) {
35250 GemmMicrokernelTester()
35251 .mr(1)
35252 .nr(4)
35253 .kr(8)
35254 .sr(1)
35255 .m(1)
35256 .n(4)
35257 .k(k)
35258 .ks(3)
35259 .a_offset(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080035260 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035261 }
35262 }
35263
35264 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080035265 for (size_t k = 1; k <= 40; k += 9) {
35266 for (uint32_t mz = 0; mz < 1; mz++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035267 GemmMicrokernelTester()
35268 .mr(1)
35269 .nr(4)
35270 .kr(8)
35271 .sr(1)
35272 .m(1)
35273 .n(4)
35274 .k(k)
35275 .ks(3)
35276 .a_offset(43)
35277 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080035278 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035279 }
35280 }
35281 }
35282
35283 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, qmin) {
35284 GemmMicrokernelTester()
35285 .mr(1)
35286 .nr(4)
35287 .kr(8)
35288 .sr(1)
35289 .m(1)
35290 .n(4)
35291 .k(8)
35292 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080035293 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035294 }
35295
35296 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, qmax) {
35297 GemmMicrokernelTester()
35298 .mr(1)
35299 .nr(4)
35300 .kr(8)
35301 .sr(1)
35302 .m(1)
35303 .n(4)
35304 .k(8)
35305 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080035306 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035307 }
35308
35309 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, strided_cm) {
35310 GemmMicrokernelTester()
35311 .mr(1)
35312 .nr(4)
35313 .kr(8)
35314 .sr(1)
35315 .m(1)
35316 .n(4)
35317 .k(8)
35318 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080035319 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035320 }
Marat Dukhan4c617792021-12-21 15:47:58 -080035321#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035322
35323
Marat Dukhan4c617792021-12-21 15:47:58 -080035324#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035325 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8) {
35326 GemmMicrokernelTester()
35327 .mr(2)
35328 .nr(4)
35329 .kr(8)
35330 .sr(1)
35331 .m(2)
35332 .n(4)
35333 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080035334 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035335 }
35336
35337 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, strided_cn) {
35338 GemmMicrokernelTester()
35339 .mr(2)
35340 .nr(4)
35341 .kr(8)
35342 .sr(1)
35343 .m(2)
35344 .n(4)
35345 .k(8)
35346 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080035347 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035348 }
35349
35350 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080035351 for (uint32_t n = 1; n <= 4; n++) {
35352 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035353 GemmMicrokernelTester()
35354 .mr(2)
35355 .nr(4)
35356 .kr(8)
35357 .sr(1)
35358 .m(m)
35359 .n(n)
35360 .k(8)
35361 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080035362 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035363 }
35364 }
35365 }
35366
35367 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
35368 for (uint32_t m = 1; m <= 2; m++) {
35369 GemmMicrokernelTester()
35370 .mr(2)
35371 .nr(4)
35372 .kr(8)
35373 .sr(1)
35374 .m(m)
35375 .n(4)
35376 .k(8)
35377 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080035378 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035379 }
35380 }
35381
35382 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
35383 for (uint32_t n = 1; n <= 4; n++) {
35384 GemmMicrokernelTester()
35385 .mr(2)
35386 .nr(4)
35387 .kr(8)
35388 .sr(1)
35389 .m(2)
35390 .n(n)
35391 .k(8)
35392 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080035393 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035394 }
35395 }
35396
35397 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_lt_8) {
35398 for (size_t k = 1; k < 8; k++) {
35399 GemmMicrokernelTester()
35400 .mr(2)
35401 .nr(4)
35402 .kr(8)
35403 .sr(1)
35404 .m(2)
35405 .n(4)
35406 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080035407 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035408 }
35409 }
35410
35411 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
35412 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080035413 for (uint32_t n = 1; n <= 4; n++) {
35414 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035415 GemmMicrokernelTester()
35416 .mr(2)
35417 .nr(4)
35418 .kr(8)
35419 .sr(1)
35420 .m(m)
35421 .n(n)
35422 .k(k)
35423 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080035424 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035425 }
35426 }
35427 }
35428 }
35429
35430 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_gt_8) {
35431 for (size_t k = 9; k < 16; k++) {
35432 GemmMicrokernelTester()
35433 .mr(2)
35434 .nr(4)
35435 .kr(8)
35436 .sr(1)
35437 .m(2)
35438 .n(4)
35439 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080035440 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035441 }
35442 }
35443
35444 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
35445 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080035446 for (uint32_t n = 1; n <= 4; n++) {
35447 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035448 GemmMicrokernelTester()
35449 .mr(2)
35450 .nr(4)
35451 .kr(8)
35452 .sr(1)
35453 .m(m)
35454 .n(n)
35455 .k(k)
35456 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080035457 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035458 }
35459 }
35460 }
35461 }
35462
35463 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_div_8) {
35464 for (size_t k = 16; k <= 80; k += 8) {
35465 GemmMicrokernelTester()
35466 .mr(2)
35467 .nr(4)
35468 .kr(8)
35469 .sr(1)
35470 .m(2)
35471 .n(4)
35472 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080035473 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035474 }
35475 }
35476
35477 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
35478 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080035479 for (uint32_t n = 1; n <= 4; n++) {
35480 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035481 GemmMicrokernelTester()
35482 .mr(2)
35483 .nr(4)
35484 .kr(8)
35485 .sr(1)
35486 .m(m)
35487 .n(n)
35488 .k(k)
35489 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080035490 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035491 }
35492 }
35493 }
35494 }
35495
35496 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4) {
35497 for (uint32_t n = 5; n < 8; n++) {
35498 for (size_t k = 1; k <= 40; k += 9) {
35499 GemmMicrokernelTester()
35500 .mr(2)
35501 .nr(4)
35502 .kr(8)
35503 .sr(1)
35504 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080035505 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035506 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080035507 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035508 }
35509 }
35510 }
35511
35512 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
35513 for (uint32_t n = 5; n < 8; n++) {
35514 for (size_t k = 1; k <= 40; k += 9) {
35515 GemmMicrokernelTester()
35516 .mr(2)
35517 .nr(4)
35518 .kr(8)
35519 .sr(1)
35520 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080035521 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035522 .k(k)
35523 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080035524 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035525 }
35526 }
35527 }
35528
35529 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
35530 for (uint32_t n = 5; n < 8; n++) {
35531 for (size_t k = 1; k <= 40; k += 9) {
35532 for (uint32_t m = 1; m <= 2; m++) {
35533 GemmMicrokernelTester()
35534 .mr(2)
35535 .nr(4)
35536 .kr(8)
35537 .sr(1)
35538 .m(m)
35539 .n(n)
35540 .k(k)
35541 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080035542 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035543 }
35544 }
35545 }
35546 }
35547
35548 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, n_div_4) {
35549 for (uint32_t n = 8; n <= 12; n += 4) {
35550 for (size_t k = 1; k <= 40; k += 9) {
35551 GemmMicrokernelTester()
35552 .mr(2)
35553 .nr(4)
35554 .kr(8)
35555 .sr(1)
35556 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080035557 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035558 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080035559 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035560 }
35561 }
35562 }
35563
35564 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
35565 for (uint32_t n = 8; n <= 12; n += 4) {
35566 for (size_t k = 1; k <= 40; k += 9) {
35567 GemmMicrokernelTester()
35568 .mr(2)
35569 .nr(4)
35570 .kr(8)
35571 .sr(1)
35572 .m(2)
35573 .n(n)
35574 .k(k)
35575 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080035576 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035577 }
35578 }
35579 }
35580
35581 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
35582 for (uint32_t n = 8; n <= 12; n += 4) {
35583 for (size_t k = 1; k <= 40; k += 9) {
35584 for (uint32_t m = 1; m <= 2; m++) {
35585 GemmMicrokernelTester()
35586 .mr(2)
35587 .nr(4)
35588 .kr(8)
35589 .sr(1)
35590 .m(m)
35591 .n(n)
35592 .k(k)
35593 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080035594 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035595 }
35596 }
35597 }
35598 }
35599
35600 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, small_kernel) {
35601 for (size_t k = 1; k <= 40; k += 9) {
35602 GemmMicrokernelTester()
35603 .mr(2)
35604 .nr(4)
35605 .kr(8)
35606 .sr(1)
35607 .m(2)
35608 .n(4)
35609 .k(k)
35610 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080035611 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035612 }
35613 }
35614
35615 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, small_kernel_subtile) {
35616 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080035617 for (uint32_t n = 1; n <= 4; n++) {
35618 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035619 GemmMicrokernelTester()
35620 .mr(2)
35621 .nr(4)
35622 .kr(8)
35623 .sr(1)
35624 .m(m)
35625 .n(n)
35626 .k(k)
35627 .ks(3)
35628 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080035629 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035630 }
35631 }
35632 }
35633 }
35634
35635 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_small_kernel) {
35636 for (uint32_t n = 5; n < 8; n++) {
35637 for (size_t k = 1; k <= 40; k += 9) {
35638 GemmMicrokernelTester()
35639 .mr(2)
35640 .nr(4)
35641 .kr(8)
35642 .sr(1)
35643 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080035644 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035645 .k(k)
35646 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080035647 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035648 }
35649 }
35650 }
35651
35652 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_small_kernel) {
35653 for (uint32_t n = 8; n <= 12; n += 4) {
35654 for (size_t k = 1; k <= 40; k += 9) {
35655 GemmMicrokernelTester()
35656 .mr(2)
35657 .nr(4)
35658 .kr(8)
35659 .sr(1)
35660 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080035661 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035662 .k(k)
35663 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080035664 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035665 }
35666 }
35667 }
35668
35669 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
35670 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080035671 for (uint32_t n = 1; n <= 4; n++) {
35672 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035673 GemmMicrokernelTester()
35674 .mr(2)
35675 .nr(4)
35676 .kr(8)
35677 .sr(1)
35678 .m(m)
35679 .n(n)
35680 .k(k)
35681 .cm_stride(7)
35682 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080035683 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035684 }
35685 }
35686 }
35687 }
35688
35689 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, a_offset) {
35690 for (size_t k = 1; k <= 40; k += 9) {
35691 GemmMicrokernelTester()
35692 .mr(2)
35693 .nr(4)
35694 .kr(8)
35695 .sr(1)
35696 .m(2)
35697 .n(4)
35698 .k(k)
35699 .ks(3)
35700 .a_offset(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080035701 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035702 }
35703 }
35704
35705 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080035706 for (size_t k = 1; k <= 40; k += 9) {
35707 for (uint32_t mz = 0; mz < 2; mz++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035708 GemmMicrokernelTester()
35709 .mr(2)
35710 .nr(4)
35711 .kr(8)
35712 .sr(1)
35713 .m(2)
35714 .n(4)
35715 .k(k)
35716 .ks(3)
35717 .a_offset(83)
35718 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080035719 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035720 }
35721 }
35722 }
35723
35724 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, qmin) {
35725 GemmMicrokernelTester()
35726 .mr(2)
35727 .nr(4)
35728 .kr(8)
35729 .sr(1)
35730 .m(2)
35731 .n(4)
35732 .k(8)
35733 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080035734 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035735 }
35736
35737 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, qmax) {
35738 GemmMicrokernelTester()
35739 .mr(2)
35740 .nr(4)
35741 .kr(8)
35742 .sr(1)
35743 .m(2)
35744 .n(4)
35745 .k(8)
35746 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080035747 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035748 }
35749
35750 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, strided_cm) {
35751 GemmMicrokernelTester()
35752 .mr(2)
35753 .nr(4)
35754 .kr(8)
35755 .sr(1)
35756 .m(2)
35757 .n(4)
35758 .k(8)
35759 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080035760 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035761 }
Marat Dukhan4c617792021-12-21 15:47:58 -080035762#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035763
35764
Marat Dukhan4c617792021-12-21 15:47:58 -080035765#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035766 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8) {
35767 GemmMicrokernelTester()
35768 .mr(3)
35769 .nr(4)
35770 .kr(8)
35771 .sr(1)
35772 .m(3)
35773 .n(4)
35774 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080035775 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035776 }
35777
35778 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, strided_cn) {
35779 GemmMicrokernelTester()
35780 .mr(3)
35781 .nr(4)
35782 .kr(8)
35783 .sr(1)
35784 .m(3)
35785 .n(4)
35786 .k(8)
35787 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080035788 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035789 }
35790
35791 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080035792 for (uint32_t n = 1; n <= 4; n++) {
35793 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035794 GemmMicrokernelTester()
35795 .mr(3)
35796 .nr(4)
35797 .kr(8)
35798 .sr(1)
35799 .m(m)
35800 .n(n)
35801 .k(8)
35802 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080035803 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035804 }
35805 }
35806 }
35807
35808 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
35809 for (uint32_t m = 1; m <= 3; m++) {
35810 GemmMicrokernelTester()
35811 .mr(3)
35812 .nr(4)
35813 .kr(8)
35814 .sr(1)
35815 .m(m)
35816 .n(4)
35817 .k(8)
35818 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080035819 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035820 }
35821 }
35822
35823 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
35824 for (uint32_t n = 1; n <= 4; n++) {
35825 GemmMicrokernelTester()
35826 .mr(3)
35827 .nr(4)
35828 .kr(8)
35829 .sr(1)
35830 .m(3)
35831 .n(n)
35832 .k(8)
35833 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080035834 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035835 }
35836 }
35837
35838 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_lt_8) {
35839 for (size_t k = 1; k < 8; k++) {
35840 GemmMicrokernelTester()
35841 .mr(3)
35842 .nr(4)
35843 .kr(8)
35844 .sr(1)
35845 .m(3)
35846 .n(4)
35847 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080035848 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035849 }
35850 }
35851
35852 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
35853 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080035854 for (uint32_t n = 1; n <= 4; n++) {
35855 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035856 GemmMicrokernelTester()
35857 .mr(3)
35858 .nr(4)
35859 .kr(8)
35860 .sr(1)
35861 .m(m)
35862 .n(n)
35863 .k(k)
35864 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080035865 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035866 }
35867 }
35868 }
35869 }
35870
35871 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_gt_8) {
35872 for (size_t k = 9; k < 16; k++) {
35873 GemmMicrokernelTester()
35874 .mr(3)
35875 .nr(4)
35876 .kr(8)
35877 .sr(1)
35878 .m(3)
35879 .n(4)
35880 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080035881 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035882 }
35883 }
35884
35885 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
35886 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080035887 for (uint32_t n = 1; n <= 4; n++) {
35888 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035889 GemmMicrokernelTester()
35890 .mr(3)
35891 .nr(4)
35892 .kr(8)
35893 .sr(1)
35894 .m(m)
35895 .n(n)
35896 .k(k)
35897 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080035898 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035899 }
35900 }
35901 }
35902 }
35903
35904 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_div_8) {
35905 for (size_t k = 16; k <= 80; k += 8) {
35906 GemmMicrokernelTester()
35907 .mr(3)
35908 .nr(4)
35909 .kr(8)
35910 .sr(1)
35911 .m(3)
35912 .n(4)
35913 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080035914 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035915 }
35916 }
35917
35918 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
35919 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080035920 for (uint32_t n = 1; n <= 4; n++) {
35921 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035922 GemmMicrokernelTester()
35923 .mr(3)
35924 .nr(4)
35925 .kr(8)
35926 .sr(1)
35927 .m(m)
35928 .n(n)
35929 .k(k)
35930 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080035931 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035932 }
35933 }
35934 }
35935 }
35936
35937 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4) {
35938 for (uint32_t n = 5; n < 8; n++) {
35939 for (size_t k = 1; k <= 40; k += 9) {
35940 GemmMicrokernelTester()
35941 .mr(3)
35942 .nr(4)
35943 .kr(8)
35944 .sr(1)
35945 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080035946 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035947 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080035948 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035949 }
35950 }
35951 }
35952
35953 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
35954 for (uint32_t n = 5; n < 8; n++) {
35955 for (size_t k = 1; k <= 40; k += 9) {
35956 GemmMicrokernelTester()
35957 .mr(3)
35958 .nr(4)
35959 .kr(8)
35960 .sr(1)
35961 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080035962 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035963 .k(k)
35964 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080035965 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035966 }
35967 }
35968 }
35969
35970 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
35971 for (uint32_t n = 5; n < 8; n++) {
35972 for (size_t k = 1; k <= 40; k += 9) {
35973 for (uint32_t m = 1; m <= 3; m++) {
35974 GemmMicrokernelTester()
35975 .mr(3)
35976 .nr(4)
35977 .kr(8)
35978 .sr(1)
35979 .m(m)
35980 .n(n)
35981 .k(k)
35982 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080035983 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035984 }
35985 }
35986 }
35987 }
35988
35989 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, n_div_4) {
35990 for (uint32_t n = 8; n <= 12; n += 4) {
35991 for (size_t k = 1; k <= 40; k += 9) {
35992 GemmMicrokernelTester()
35993 .mr(3)
35994 .nr(4)
35995 .kr(8)
35996 .sr(1)
35997 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080035998 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070035999 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080036000 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070036001 }
36002 }
36003 }
36004
36005 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
36006 for (uint32_t n = 8; n <= 12; n += 4) {
36007 for (size_t k = 1; k <= 40; k += 9) {
36008 GemmMicrokernelTester()
36009 .mr(3)
36010 .nr(4)
36011 .kr(8)
36012 .sr(1)
36013 .m(3)
36014 .n(n)
36015 .k(k)
36016 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080036017 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070036018 }
36019 }
36020 }
36021
36022 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
36023 for (uint32_t n = 8; n <= 12; n += 4) {
36024 for (size_t k = 1; k <= 40; k += 9) {
36025 for (uint32_t m = 1; m <= 3; m++) {
36026 GemmMicrokernelTester()
36027 .mr(3)
36028 .nr(4)
36029 .kr(8)
36030 .sr(1)
36031 .m(m)
36032 .n(n)
36033 .k(k)
36034 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080036035 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070036036 }
36037 }
36038 }
36039 }
36040
36041 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, small_kernel) {
36042 for (size_t k = 1; k <= 40; k += 9) {
36043 GemmMicrokernelTester()
36044 .mr(3)
36045 .nr(4)
36046 .kr(8)
36047 .sr(1)
36048 .m(3)
36049 .n(4)
36050 .k(k)
36051 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080036052 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070036053 }
36054 }
36055
36056 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, small_kernel_subtile) {
36057 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080036058 for (uint32_t n = 1; n <= 4; n++) {
36059 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070036060 GemmMicrokernelTester()
36061 .mr(3)
36062 .nr(4)
36063 .kr(8)
36064 .sr(1)
36065 .m(m)
36066 .n(n)
36067 .k(k)
36068 .ks(3)
36069 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080036070 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070036071 }
36072 }
36073 }
36074 }
36075
36076 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_small_kernel) {
36077 for (uint32_t n = 5; n < 8; n++) {
36078 for (size_t k = 1; k <= 40; k += 9) {
36079 GemmMicrokernelTester()
36080 .mr(3)
36081 .nr(4)
36082 .kr(8)
36083 .sr(1)
36084 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080036085 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070036086 .k(k)
36087 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080036088 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070036089 }
36090 }
36091 }
36092
36093 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_small_kernel) {
36094 for (uint32_t n = 8; n <= 12; n += 4) {
36095 for (size_t k = 1; k <= 40; k += 9) {
36096 GemmMicrokernelTester()
36097 .mr(3)
36098 .nr(4)
36099 .kr(8)
36100 .sr(1)
36101 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080036102 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070036103 .k(k)
36104 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080036105 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070036106 }
36107 }
36108 }
36109
36110 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
36111 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080036112 for (uint32_t n = 1; n <= 4; n++) {
36113 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070036114 GemmMicrokernelTester()
36115 .mr(3)
36116 .nr(4)
36117 .kr(8)
36118 .sr(1)
36119 .m(m)
36120 .n(n)
36121 .k(k)
36122 .cm_stride(7)
36123 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080036124 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070036125 }
36126 }
36127 }
36128 }
36129
36130 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, a_offset) {
36131 for (size_t k = 1; k <= 40; k += 9) {
36132 GemmMicrokernelTester()
36133 .mr(3)
36134 .nr(4)
36135 .kr(8)
36136 .sr(1)
36137 .m(3)
36138 .n(4)
36139 .k(k)
36140 .ks(3)
36141 .a_offset(127)
Marat Dukhan50323b82022-01-11 00:12:01 -080036142 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070036143 }
36144 }
36145
36146 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080036147 for (size_t k = 1; k <= 40; k += 9) {
36148 for (uint32_t mz = 0; mz < 3; mz++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070036149 GemmMicrokernelTester()
36150 .mr(3)
36151 .nr(4)
36152 .kr(8)
36153 .sr(1)
36154 .m(3)
36155 .n(4)
36156 .k(k)
36157 .ks(3)
36158 .a_offset(127)
36159 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080036160 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070036161 }
36162 }
36163 }
36164
36165 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, qmin) {
36166 GemmMicrokernelTester()
36167 .mr(3)
36168 .nr(4)
36169 .kr(8)
36170 .sr(1)
36171 .m(3)
36172 .n(4)
36173 .k(8)
36174 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080036175 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070036176 }
36177
36178 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, qmax) {
36179 GemmMicrokernelTester()
36180 .mr(3)
36181 .nr(4)
36182 .kr(8)
36183 .sr(1)
36184 .m(3)
36185 .n(4)
36186 .k(8)
36187 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080036188 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070036189 }
36190
36191 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, strided_cm) {
36192 GemmMicrokernelTester()
36193 .mr(3)
36194 .nr(4)
36195 .kr(8)
36196 .sr(1)
36197 .m(3)
36198 .n(4)
36199 .k(8)
36200 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080036201 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070036202 }
Marat Dukhan4c617792021-12-21 15:47:58 -080036203#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan8dc106e2021-08-31 15:23:02 -070036204
36205
Marat Dukhan4c617792021-12-21 15:47:58 -080036206#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhandfc2db02021-08-08 21:19:07 -070036207 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL16_LD64, k_eq_8) {
Marat Dukhan4741e412021-06-30 13:38:06 -070036208 GemmMicrokernelTester()
36209 .mr(1)
36210 .nr(4)
36211 .kr(8)
36212 .sr(1)
36213 .m(1)
36214 .n(4)
36215 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080036216 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070036217 }
36218
Marat Dukhandfc2db02021-08-08 21:19:07 -070036219 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL16_LD64, strided_cn) {
Marat Dukhan4741e412021-06-30 13:38:06 -070036220 GemmMicrokernelTester()
36221 .mr(1)
36222 .nr(4)
36223 .kr(8)
36224 .sr(1)
36225 .m(1)
36226 .n(4)
36227 .k(8)
36228 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080036229 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070036230 }
36231
Marat Dukhandfc2db02021-08-08 21:19:07 -070036232 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL16_LD64, k_eq_8_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080036233 for (uint32_t n = 1; n <= 4; n++) {
36234 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan4741e412021-06-30 13:38:06 -070036235 GemmMicrokernelTester()
36236 .mr(1)
36237 .nr(4)
36238 .kr(8)
36239 .sr(1)
36240 .m(m)
36241 .n(n)
36242 .k(8)
36243 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080036244 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070036245 }
36246 }
36247 }
36248
Marat Dukhandfc2db02021-08-08 21:19:07 -070036249 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL16_LD64, k_eq_8_subtile_m) {
Marat Dukhan4741e412021-06-30 13:38:06 -070036250 for (uint32_t m = 1; m <= 1; m++) {
36251 GemmMicrokernelTester()
36252 .mr(1)
36253 .nr(4)
36254 .kr(8)
36255 .sr(1)
36256 .m(m)
36257 .n(4)
36258 .k(8)
36259 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080036260 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070036261 }
36262 }
36263
Marat Dukhandfc2db02021-08-08 21:19:07 -070036264 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL16_LD64, k_eq_8_subtile_n) {
Marat Dukhan4741e412021-06-30 13:38:06 -070036265 for (uint32_t n = 1; n <= 4; n++) {
36266 GemmMicrokernelTester()
36267 .mr(1)
36268 .nr(4)
36269 .kr(8)
36270 .sr(1)
36271 .m(1)
36272 .n(n)
36273 .k(8)
36274 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080036275 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070036276 }
36277 }
36278
Marat Dukhandfc2db02021-08-08 21:19:07 -070036279 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL16_LD64, k_lt_8) {
Marat Dukhan4741e412021-06-30 13:38:06 -070036280 for (size_t k = 1; k < 8; k++) {
36281 GemmMicrokernelTester()
36282 .mr(1)
36283 .nr(4)
36284 .kr(8)
36285 .sr(1)
36286 .m(1)
36287 .n(4)
36288 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080036289 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070036290 }
36291 }
36292
Marat Dukhandfc2db02021-08-08 21:19:07 -070036293 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL16_LD64, k_lt_8_subtile) {
Marat Dukhan4741e412021-06-30 13:38:06 -070036294 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080036295 for (uint32_t n = 1; n <= 4; n++) {
36296 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan4741e412021-06-30 13:38:06 -070036297 GemmMicrokernelTester()
36298 .mr(1)
36299 .nr(4)
36300 .kr(8)
36301 .sr(1)
36302 .m(m)
36303 .n(n)
36304 .k(k)
36305 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080036306 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070036307 }
36308 }
36309 }
36310 }
36311
Marat Dukhandfc2db02021-08-08 21:19:07 -070036312 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL16_LD64, k_gt_8) {
Marat Dukhan4741e412021-06-30 13:38:06 -070036313 for (size_t k = 9; k < 16; k++) {
36314 GemmMicrokernelTester()
36315 .mr(1)
36316 .nr(4)
36317 .kr(8)
36318 .sr(1)
36319 .m(1)
36320 .n(4)
36321 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080036322 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070036323 }
36324 }
36325
Marat Dukhandfc2db02021-08-08 21:19:07 -070036326 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL16_LD64, k_gt_8_subtile) {
Marat Dukhan4741e412021-06-30 13:38:06 -070036327 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080036328 for (uint32_t n = 1; n <= 4; n++) {
36329 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan4741e412021-06-30 13:38:06 -070036330 GemmMicrokernelTester()
36331 .mr(1)
36332 .nr(4)
36333 .kr(8)
36334 .sr(1)
36335 .m(m)
36336 .n(n)
36337 .k(k)
36338 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080036339 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070036340 }
36341 }
36342 }
36343 }
36344
Marat Dukhandfc2db02021-08-08 21:19:07 -070036345 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL16_LD64, k_div_8) {
Marat Dukhan4741e412021-06-30 13:38:06 -070036346 for (size_t k = 16; k <= 80; k += 8) {
36347 GemmMicrokernelTester()
36348 .mr(1)
36349 .nr(4)
36350 .kr(8)
36351 .sr(1)
36352 .m(1)
36353 .n(4)
36354 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080036355 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070036356 }
36357 }
36358
Marat Dukhandfc2db02021-08-08 21:19:07 -070036359 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL16_LD64, k_div_8_subtile) {
Marat Dukhan4741e412021-06-30 13:38:06 -070036360 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080036361 for (uint32_t n = 1; n <= 4; n++) {
36362 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan4741e412021-06-30 13:38:06 -070036363 GemmMicrokernelTester()
36364 .mr(1)
36365 .nr(4)
36366 .kr(8)
36367 .sr(1)
36368 .m(m)
36369 .n(n)
36370 .k(k)
36371 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080036372 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070036373 }
36374 }
36375 }
36376 }
36377
Marat Dukhandfc2db02021-08-08 21:19:07 -070036378 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL16_LD64, n_gt_4) {
Marat Dukhan4741e412021-06-30 13:38:06 -070036379 for (uint32_t n = 5; n < 8; n++) {
36380 for (size_t k = 1; k <= 40; k += 9) {
36381 GemmMicrokernelTester()
36382 .mr(1)
36383 .nr(4)
36384 .kr(8)
36385 .sr(1)
36386 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080036387 .n(n)
Marat Dukhan4741e412021-06-30 13:38:06 -070036388 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080036389 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070036390 }
36391 }
36392 }
36393
Marat Dukhandfc2db02021-08-08 21:19:07 -070036394 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL16_LD64, n_gt_4_strided_cn) {
Marat Dukhan4741e412021-06-30 13:38:06 -070036395 for (uint32_t n = 5; n < 8; n++) {
36396 for (size_t k = 1; k <= 40; k += 9) {
36397 GemmMicrokernelTester()
36398 .mr(1)
36399 .nr(4)
36400 .kr(8)
36401 .sr(1)
36402 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080036403 .n(n)
Marat Dukhan4741e412021-06-30 13:38:06 -070036404 .k(k)
36405 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080036406 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070036407 }
36408 }
36409 }
36410
Marat Dukhandfc2db02021-08-08 21:19:07 -070036411 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL16_LD64, n_gt_4_subtile) {
Marat Dukhan4741e412021-06-30 13:38:06 -070036412 for (uint32_t n = 5; n < 8; n++) {
36413 for (size_t k = 1; k <= 40; k += 9) {
36414 for (uint32_t m = 1; m <= 1; m++) {
36415 GemmMicrokernelTester()
36416 .mr(1)
36417 .nr(4)
36418 .kr(8)
36419 .sr(1)
36420 .m(m)
36421 .n(n)
36422 .k(k)
36423 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080036424 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070036425 }
36426 }
36427 }
36428 }
36429
Marat Dukhandfc2db02021-08-08 21:19:07 -070036430 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL16_LD64, n_div_4) {
Marat Dukhan4741e412021-06-30 13:38:06 -070036431 for (uint32_t n = 8; n <= 12; n += 4) {
36432 for (size_t k = 1; k <= 40; k += 9) {
36433 GemmMicrokernelTester()
36434 .mr(1)
36435 .nr(4)
36436 .kr(8)
36437 .sr(1)
36438 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080036439 .n(n)
Marat Dukhan4741e412021-06-30 13:38:06 -070036440 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080036441 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070036442 }
36443 }
36444 }
36445
Marat Dukhandfc2db02021-08-08 21:19:07 -070036446 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL16_LD64, n_div_4_strided_cn) {
Marat Dukhan4741e412021-06-30 13:38:06 -070036447 for (uint32_t n = 8; n <= 12; n += 4) {
36448 for (size_t k = 1; k <= 40; k += 9) {
36449 GemmMicrokernelTester()
36450 .mr(1)
36451 .nr(4)
36452 .kr(8)
36453 .sr(1)
36454 .m(1)
36455 .n(n)
36456 .k(k)
36457 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080036458 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070036459 }
36460 }
36461 }
36462
Marat Dukhandfc2db02021-08-08 21:19:07 -070036463 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL16_LD64, n_div_4_subtile) {
Marat Dukhan4741e412021-06-30 13:38:06 -070036464 for (uint32_t n = 8; n <= 12; n += 4) {
36465 for (size_t k = 1; k <= 40; k += 9) {
36466 for (uint32_t m = 1; m <= 1; m++) {
36467 GemmMicrokernelTester()
36468 .mr(1)
36469 .nr(4)
36470 .kr(8)
36471 .sr(1)
36472 .m(m)
36473 .n(n)
36474 .k(k)
36475 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080036476 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070036477 }
36478 }
36479 }
36480 }
36481
Marat Dukhandfc2db02021-08-08 21:19:07 -070036482 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL16_LD64, small_kernel) {
Marat Dukhan4741e412021-06-30 13:38:06 -070036483 for (size_t k = 1; k <= 40; k += 9) {
36484 GemmMicrokernelTester()
36485 .mr(1)
36486 .nr(4)
36487 .kr(8)
36488 .sr(1)
36489 .m(1)
36490 .n(4)
36491 .k(k)
36492 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080036493 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070036494 }
36495 }
36496
Marat Dukhandfc2db02021-08-08 21:19:07 -070036497 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL16_LD64, small_kernel_subtile) {
Marat Dukhan4741e412021-06-30 13:38:06 -070036498 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080036499 for (uint32_t n = 1; n <= 4; n++) {
36500 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan4741e412021-06-30 13:38:06 -070036501 GemmMicrokernelTester()
36502 .mr(1)
36503 .nr(4)
36504 .kr(8)
36505 .sr(1)
36506 .m(m)
36507 .n(n)
36508 .k(k)
36509 .ks(3)
36510 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080036511 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070036512 }
36513 }
36514 }
36515 }
36516
Marat Dukhandfc2db02021-08-08 21:19:07 -070036517 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL16_LD64, n_gt_4_small_kernel) {
Marat Dukhan4741e412021-06-30 13:38:06 -070036518 for (uint32_t n = 5; n < 8; n++) {
36519 for (size_t k = 1; k <= 40; k += 9) {
36520 GemmMicrokernelTester()
36521 .mr(1)
36522 .nr(4)
36523 .kr(8)
36524 .sr(1)
36525 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080036526 .n(n)
Marat Dukhan4741e412021-06-30 13:38:06 -070036527 .k(k)
36528 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080036529 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070036530 }
36531 }
36532 }
36533
Marat Dukhandfc2db02021-08-08 21:19:07 -070036534 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL16_LD64, n_div_4_small_kernel) {
Marat Dukhan4741e412021-06-30 13:38:06 -070036535 for (uint32_t n = 8; n <= 12; n += 4) {
36536 for (size_t k = 1; k <= 40; k += 9) {
36537 GemmMicrokernelTester()
36538 .mr(1)
36539 .nr(4)
36540 .kr(8)
36541 .sr(1)
36542 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080036543 .n(n)
Marat Dukhan4741e412021-06-30 13:38:06 -070036544 .k(k)
36545 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080036546 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070036547 }
36548 }
36549 }
36550
Marat Dukhandfc2db02021-08-08 21:19:07 -070036551 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL16_LD64, strided_cm_subtile) {
Marat Dukhan4741e412021-06-30 13:38:06 -070036552 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080036553 for (uint32_t n = 1; n <= 4; n++) {
36554 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan4741e412021-06-30 13:38:06 -070036555 GemmMicrokernelTester()
36556 .mr(1)
36557 .nr(4)
36558 .kr(8)
36559 .sr(1)
36560 .m(m)
36561 .n(n)
36562 .k(k)
36563 .cm_stride(7)
36564 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080036565 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070036566 }
36567 }
36568 }
36569 }
36570
Marat Dukhandfc2db02021-08-08 21:19:07 -070036571 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL16_LD64, a_offset) {
Marat Dukhan4741e412021-06-30 13:38:06 -070036572 for (size_t k = 1; k <= 40; k += 9) {
36573 GemmMicrokernelTester()
36574 .mr(1)
36575 .nr(4)
36576 .kr(8)
36577 .sr(1)
36578 .m(1)
36579 .n(4)
36580 .k(k)
36581 .ks(3)
36582 .a_offset(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080036583 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070036584 }
36585 }
36586
Marat Dukhandfc2db02021-08-08 21:19:07 -070036587 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL16_LD64, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080036588 for (size_t k = 1; k <= 40; k += 9) {
36589 for (uint32_t mz = 0; mz < 1; mz++) {
Marat Dukhan4741e412021-06-30 13:38:06 -070036590 GemmMicrokernelTester()
36591 .mr(1)
36592 .nr(4)
36593 .kr(8)
36594 .sr(1)
36595 .m(1)
36596 .n(4)
36597 .k(k)
36598 .ks(3)
36599 .a_offset(43)
36600 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080036601 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070036602 }
36603 }
36604 }
36605
Marat Dukhandfc2db02021-08-08 21:19:07 -070036606 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL16_LD64, qmin) {
Marat Dukhan4741e412021-06-30 13:38:06 -070036607 GemmMicrokernelTester()
36608 .mr(1)
36609 .nr(4)
36610 .kr(8)
36611 .sr(1)
36612 .m(1)
36613 .n(4)
36614 .k(8)
36615 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080036616 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070036617 }
36618
Marat Dukhandfc2db02021-08-08 21:19:07 -070036619 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL16_LD64, qmax) {
Marat Dukhan4741e412021-06-30 13:38:06 -070036620 GemmMicrokernelTester()
36621 .mr(1)
36622 .nr(4)
36623 .kr(8)
36624 .sr(1)
36625 .m(1)
36626 .n(4)
36627 .k(8)
36628 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080036629 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070036630 }
36631
Marat Dukhandfc2db02021-08-08 21:19:07 -070036632 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL16_LD64, strided_cm) {
Marat Dukhan4741e412021-06-30 13:38:06 -070036633 GemmMicrokernelTester()
36634 .mr(1)
36635 .nr(4)
36636 .kr(8)
36637 .sr(1)
36638 .m(1)
36639 .n(4)
36640 .k(8)
36641 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080036642 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070036643 }
Marat Dukhan4c617792021-12-21 15:47:58 -080036644#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan4741e412021-06-30 13:38:06 -070036645
36646
Marat Dukhan4c617792021-12-21 15:47:58 -080036647#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhandfc2db02021-08-08 21:19:07 -070036648 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_MUL16_LD64, k_eq_8) {
Marat Dukhan4741e412021-06-30 13:38:06 -070036649 GemmMicrokernelTester()
36650 .mr(2)
36651 .nr(4)
36652 .kr(8)
36653 .sr(1)
36654 .m(2)
36655 .n(4)
36656 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080036657 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070036658 }
36659
Marat Dukhandfc2db02021-08-08 21:19:07 -070036660 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_MUL16_LD64, strided_cn) {
Marat Dukhan4741e412021-06-30 13:38:06 -070036661 GemmMicrokernelTester()
36662 .mr(2)
36663 .nr(4)
36664 .kr(8)
36665 .sr(1)
36666 .m(2)
36667 .n(4)
36668 .k(8)
36669 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080036670 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070036671 }
36672
Marat Dukhandfc2db02021-08-08 21:19:07 -070036673 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_MUL16_LD64, k_eq_8_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080036674 for (uint32_t n = 1; n <= 4; n++) {
36675 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan4741e412021-06-30 13:38:06 -070036676 GemmMicrokernelTester()
36677 .mr(2)
36678 .nr(4)
36679 .kr(8)
36680 .sr(1)
36681 .m(m)
36682 .n(n)
36683 .k(8)
36684 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080036685 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070036686 }
36687 }
36688 }
36689
Marat Dukhandfc2db02021-08-08 21:19:07 -070036690 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_MUL16_LD64, k_eq_8_subtile_m) {
Marat Dukhan4741e412021-06-30 13:38:06 -070036691 for (uint32_t m = 1; m <= 2; m++) {
36692 GemmMicrokernelTester()
36693 .mr(2)
36694 .nr(4)
36695 .kr(8)
36696 .sr(1)
36697 .m(m)
36698 .n(4)
36699 .k(8)
36700 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080036701 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070036702 }
36703 }
36704
Marat Dukhandfc2db02021-08-08 21:19:07 -070036705 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_MUL16_LD64, k_eq_8_subtile_n) {
Marat Dukhan4741e412021-06-30 13:38:06 -070036706 for (uint32_t n = 1; n <= 4; n++) {
36707 GemmMicrokernelTester()
36708 .mr(2)
36709 .nr(4)
36710 .kr(8)
36711 .sr(1)
36712 .m(2)
36713 .n(n)
36714 .k(8)
36715 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080036716 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070036717 }
36718 }
36719
Marat Dukhandfc2db02021-08-08 21:19:07 -070036720 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_MUL16_LD64, k_lt_8) {
Marat Dukhan4741e412021-06-30 13:38:06 -070036721 for (size_t k = 1; k < 8; k++) {
36722 GemmMicrokernelTester()
36723 .mr(2)
36724 .nr(4)
36725 .kr(8)
36726 .sr(1)
36727 .m(2)
36728 .n(4)
36729 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080036730 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070036731 }
36732 }
36733
Marat Dukhandfc2db02021-08-08 21:19:07 -070036734 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_MUL16_LD64, k_lt_8_subtile) {
Marat Dukhan4741e412021-06-30 13:38:06 -070036735 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080036736 for (uint32_t n = 1; n <= 4; n++) {
36737 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan4741e412021-06-30 13:38:06 -070036738 GemmMicrokernelTester()
36739 .mr(2)
36740 .nr(4)
36741 .kr(8)
36742 .sr(1)
36743 .m(m)
36744 .n(n)
36745 .k(k)
36746 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080036747 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070036748 }
36749 }
36750 }
36751 }
36752
Marat Dukhandfc2db02021-08-08 21:19:07 -070036753 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_MUL16_LD64, k_gt_8) {
Marat Dukhan4741e412021-06-30 13:38:06 -070036754 for (size_t k = 9; k < 16; k++) {
36755 GemmMicrokernelTester()
36756 .mr(2)
36757 .nr(4)
36758 .kr(8)
36759 .sr(1)
36760 .m(2)
36761 .n(4)
36762 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080036763 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070036764 }
36765 }
36766
Marat Dukhandfc2db02021-08-08 21:19:07 -070036767 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_MUL16_LD64, k_gt_8_subtile) {
Marat Dukhan4741e412021-06-30 13:38:06 -070036768 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080036769 for (uint32_t n = 1; n <= 4; n++) {
36770 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan4741e412021-06-30 13:38:06 -070036771 GemmMicrokernelTester()
36772 .mr(2)
36773 .nr(4)
36774 .kr(8)
36775 .sr(1)
36776 .m(m)
36777 .n(n)
36778 .k(k)
36779 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080036780 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070036781 }
36782 }
36783 }
36784 }
36785
Marat Dukhandfc2db02021-08-08 21:19:07 -070036786 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_MUL16_LD64, k_div_8) {
Marat Dukhan4741e412021-06-30 13:38:06 -070036787 for (size_t k = 16; k <= 80; k += 8) {
36788 GemmMicrokernelTester()
36789 .mr(2)
36790 .nr(4)
36791 .kr(8)
36792 .sr(1)
36793 .m(2)
36794 .n(4)
36795 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080036796 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070036797 }
36798 }
36799
Marat Dukhandfc2db02021-08-08 21:19:07 -070036800 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_MUL16_LD64, k_div_8_subtile) {
Marat Dukhan4741e412021-06-30 13:38:06 -070036801 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080036802 for (uint32_t n = 1; n <= 4; n++) {
36803 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan4741e412021-06-30 13:38:06 -070036804 GemmMicrokernelTester()
36805 .mr(2)
36806 .nr(4)
36807 .kr(8)
36808 .sr(1)
36809 .m(m)
36810 .n(n)
36811 .k(k)
36812 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080036813 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070036814 }
36815 }
36816 }
36817 }
36818
Marat Dukhandfc2db02021-08-08 21:19:07 -070036819 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_MUL16_LD64, n_gt_4) {
Marat Dukhan4741e412021-06-30 13:38:06 -070036820 for (uint32_t n = 5; n < 8; n++) {
36821 for (size_t k = 1; k <= 40; k += 9) {
36822 GemmMicrokernelTester()
36823 .mr(2)
36824 .nr(4)
36825 .kr(8)
36826 .sr(1)
36827 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080036828 .n(n)
Marat Dukhan4741e412021-06-30 13:38:06 -070036829 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080036830 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070036831 }
36832 }
36833 }
36834
Marat Dukhandfc2db02021-08-08 21:19:07 -070036835 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_MUL16_LD64, n_gt_4_strided_cn) {
Marat Dukhan4741e412021-06-30 13:38:06 -070036836 for (uint32_t n = 5; n < 8; n++) {
36837 for (size_t k = 1; k <= 40; k += 9) {
36838 GemmMicrokernelTester()
36839 .mr(2)
36840 .nr(4)
36841 .kr(8)
36842 .sr(1)
36843 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080036844 .n(n)
Marat Dukhan4741e412021-06-30 13:38:06 -070036845 .k(k)
36846 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080036847 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070036848 }
36849 }
36850 }
36851
Marat Dukhandfc2db02021-08-08 21:19:07 -070036852 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_MUL16_LD64, n_gt_4_subtile) {
Marat Dukhan4741e412021-06-30 13:38:06 -070036853 for (uint32_t n = 5; n < 8; n++) {
36854 for (size_t k = 1; k <= 40; k += 9) {
36855 for (uint32_t m = 1; m <= 2; m++) {
36856 GemmMicrokernelTester()
36857 .mr(2)
36858 .nr(4)
36859 .kr(8)
36860 .sr(1)
36861 .m(m)
36862 .n(n)
36863 .k(k)
36864 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080036865 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070036866 }
36867 }
36868 }
36869 }
36870
Marat Dukhandfc2db02021-08-08 21:19:07 -070036871 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_MUL16_LD64, n_div_4) {
Marat Dukhan4741e412021-06-30 13:38:06 -070036872 for (uint32_t n = 8; n <= 12; n += 4) {
36873 for (size_t k = 1; k <= 40; k += 9) {
36874 GemmMicrokernelTester()
36875 .mr(2)
36876 .nr(4)
36877 .kr(8)
36878 .sr(1)
36879 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080036880 .n(n)
Marat Dukhan4741e412021-06-30 13:38:06 -070036881 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080036882 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070036883 }
36884 }
36885 }
36886
Marat Dukhandfc2db02021-08-08 21:19:07 -070036887 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_MUL16_LD64, n_div_4_strided_cn) {
Marat Dukhan4741e412021-06-30 13:38:06 -070036888 for (uint32_t n = 8; n <= 12; n += 4) {
36889 for (size_t k = 1; k <= 40; k += 9) {
36890 GemmMicrokernelTester()
36891 .mr(2)
36892 .nr(4)
36893 .kr(8)
36894 .sr(1)
36895 .m(2)
36896 .n(n)
36897 .k(k)
36898 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080036899 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070036900 }
36901 }
36902 }
36903
Marat Dukhandfc2db02021-08-08 21:19:07 -070036904 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_MUL16_LD64, n_div_4_subtile) {
Marat Dukhan4741e412021-06-30 13:38:06 -070036905 for (uint32_t n = 8; n <= 12; n += 4) {
36906 for (size_t k = 1; k <= 40; k += 9) {
36907 for (uint32_t m = 1; m <= 2; m++) {
36908 GemmMicrokernelTester()
36909 .mr(2)
36910 .nr(4)
36911 .kr(8)
36912 .sr(1)
36913 .m(m)
36914 .n(n)
36915 .k(k)
36916 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080036917 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070036918 }
36919 }
36920 }
36921 }
36922
Marat Dukhandfc2db02021-08-08 21:19:07 -070036923 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_MUL16_LD64, small_kernel) {
Marat Dukhan4741e412021-06-30 13:38:06 -070036924 for (size_t k = 1; k <= 40; k += 9) {
36925 GemmMicrokernelTester()
36926 .mr(2)
36927 .nr(4)
36928 .kr(8)
36929 .sr(1)
36930 .m(2)
36931 .n(4)
36932 .k(k)
36933 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080036934 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070036935 }
36936 }
36937
Marat Dukhandfc2db02021-08-08 21:19:07 -070036938 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_MUL16_LD64, small_kernel_subtile) {
Marat Dukhan4741e412021-06-30 13:38:06 -070036939 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080036940 for (uint32_t n = 1; n <= 4; n++) {
36941 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan4741e412021-06-30 13:38:06 -070036942 GemmMicrokernelTester()
36943 .mr(2)
36944 .nr(4)
36945 .kr(8)
36946 .sr(1)
36947 .m(m)
36948 .n(n)
36949 .k(k)
36950 .ks(3)
36951 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080036952 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070036953 }
36954 }
36955 }
36956 }
36957
Marat Dukhandfc2db02021-08-08 21:19:07 -070036958 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_MUL16_LD64, n_gt_4_small_kernel) {
Marat Dukhan4741e412021-06-30 13:38:06 -070036959 for (uint32_t n = 5; n < 8; n++) {
36960 for (size_t k = 1; k <= 40; k += 9) {
36961 GemmMicrokernelTester()
36962 .mr(2)
36963 .nr(4)
36964 .kr(8)
36965 .sr(1)
36966 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080036967 .n(n)
Marat Dukhan4741e412021-06-30 13:38:06 -070036968 .k(k)
36969 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080036970 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070036971 }
36972 }
36973 }
36974
Marat Dukhandfc2db02021-08-08 21:19:07 -070036975 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_MUL16_LD64, n_div_4_small_kernel) {
Marat Dukhan4741e412021-06-30 13:38:06 -070036976 for (uint32_t n = 8; n <= 12; n += 4) {
36977 for (size_t k = 1; k <= 40; k += 9) {
36978 GemmMicrokernelTester()
36979 .mr(2)
36980 .nr(4)
36981 .kr(8)
36982 .sr(1)
36983 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080036984 .n(n)
Marat Dukhan4741e412021-06-30 13:38:06 -070036985 .k(k)
36986 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080036987 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070036988 }
36989 }
36990 }
36991
Marat Dukhandfc2db02021-08-08 21:19:07 -070036992 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_MUL16_LD64, strided_cm_subtile) {
Marat Dukhan4741e412021-06-30 13:38:06 -070036993 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080036994 for (uint32_t n = 1; n <= 4; n++) {
36995 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan4741e412021-06-30 13:38:06 -070036996 GemmMicrokernelTester()
36997 .mr(2)
36998 .nr(4)
36999 .kr(8)
37000 .sr(1)
37001 .m(m)
37002 .n(n)
37003 .k(k)
37004 .cm_stride(7)
37005 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080037006 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070037007 }
37008 }
37009 }
37010 }
37011
Marat Dukhandfc2db02021-08-08 21:19:07 -070037012 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_MUL16_LD64, a_offset) {
Marat Dukhan4741e412021-06-30 13:38:06 -070037013 for (size_t k = 1; k <= 40; k += 9) {
37014 GemmMicrokernelTester()
37015 .mr(2)
37016 .nr(4)
37017 .kr(8)
37018 .sr(1)
37019 .m(2)
37020 .n(4)
37021 .k(k)
37022 .ks(3)
37023 .a_offset(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080037024 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070037025 }
37026 }
37027
Marat Dukhandfc2db02021-08-08 21:19:07 -070037028 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_MUL16_LD64, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080037029 for (size_t k = 1; k <= 40; k += 9) {
37030 for (uint32_t mz = 0; mz < 2; mz++) {
Marat Dukhan4741e412021-06-30 13:38:06 -070037031 GemmMicrokernelTester()
37032 .mr(2)
37033 .nr(4)
37034 .kr(8)
37035 .sr(1)
37036 .m(2)
37037 .n(4)
37038 .k(k)
37039 .ks(3)
37040 .a_offset(83)
37041 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080037042 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070037043 }
37044 }
37045 }
37046
Marat Dukhandfc2db02021-08-08 21:19:07 -070037047 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_MUL16_LD64, qmin) {
Marat Dukhan4741e412021-06-30 13:38:06 -070037048 GemmMicrokernelTester()
37049 .mr(2)
37050 .nr(4)
37051 .kr(8)
37052 .sr(1)
37053 .m(2)
37054 .n(4)
37055 .k(8)
37056 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080037057 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070037058 }
37059
Marat Dukhandfc2db02021-08-08 21:19:07 -070037060 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_MUL16_LD64, qmax) {
Marat Dukhan4741e412021-06-30 13:38:06 -070037061 GemmMicrokernelTester()
37062 .mr(2)
37063 .nr(4)
37064 .kr(8)
37065 .sr(1)
37066 .m(2)
37067 .n(4)
37068 .k(8)
37069 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080037070 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070037071 }
37072
Marat Dukhandfc2db02021-08-08 21:19:07 -070037073 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_MUL16_LD64, strided_cm) {
Marat Dukhan4741e412021-06-30 13:38:06 -070037074 GemmMicrokernelTester()
37075 .mr(2)
37076 .nr(4)
37077 .kr(8)
37078 .sr(1)
37079 .m(2)
37080 .n(4)
37081 .k(8)
37082 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080037083 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070037084 }
Marat Dukhan4c617792021-12-21 15:47:58 -080037085#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan4741e412021-06-30 13:38:06 -070037086
37087
Marat Dukhan4c617792021-12-21 15:47:58 -080037088#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhandfc2db02021-08-08 21:19:07 -070037089 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL16_LD128, k_eq_8) {
Marat Dukhan4741e412021-06-30 13:38:06 -070037090 GemmMicrokernelTester()
37091 .mr(1)
37092 .nr(4)
37093 .kr(8)
37094 .sr(1)
37095 .m(1)
37096 .n(4)
37097 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080037098 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070037099 }
37100
Marat Dukhandfc2db02021-08-08 21:19:07 -070037101 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL16_LD128, strided_cn) {
Marat Dukhan4741e412021-06-30 13:38:06 -070037102 GemmMicrokernelTester()
37103 .mr(1)
37104 .nr(4)
37105 .kr(8)
37106 .sr(1)
37107 .m(1)
37108 .n(4)
37109 .k(8)
37110 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080037111 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070037112 }
37113
Marat Dukhandfc2db02021-08-08 21:19:07 -070037114 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL16_LD128, k_eq_8_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080037115 for (uint32_t n = 1; n <= 4; n++) {
37116 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan4741e412021-06-30 13:38:06 -070037117 GemmMicrokernelTester()
37118 .mr(1)
37119 .nr(4)
37120 .kr(8)
37121 .sr(1)
37122 .m(m)
37123 .n(n)
37124 .k(8)
37125 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080037126 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070037127 }
37128 }
37129 }
37130
Marat Dukhandfc2db02021-08-08 21:19:07 -070037131 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL16_LD128, k_eq_8_subtile_m) {
Marat Dukhan4741e412021-06-30 13:38:06 -070037132 for (uint32_t m = 1; m <= 1; m++) {
37133 GemmMicrokernelTester()
37134 .mr(1)
37135 .nr(4)
37136 .kr(8)
37137 .sr(1)
37138 .m(m)
37139 .n(4)
37140 .k(8)
37141 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080037142 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070037143 }
37144 }
37145
Marat Dukhandfc2db02021-08-08 21:19:07 -070037146 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL16_LD128, k_eq_8_subtile_n) {
Marat Dukhan4741e412021-06-30 13:38:06 -070037147 for (uint32_t n = 1; n <= 4; n++) {
37148 GemmMicrokernelTester()
37149 .mr(1)
37150 .nr(4)
37151 .kr(8)
37152 .sr(1)
37153 .m(1)
37154 .n(n)
37155 .k(8)
37156 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080037157 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070037158 }
37159 }
37160
Marat Dukhandfc2db02021-08-08 21:19:07 -070037161 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL16_LD128, k_lt_8) {
Marat Dukhan4741e412021-06-30 13:38:06 -070037162 for (size_t k = 1; k < 8; k++) {
37163 GemmMicrokernelTester()
37164 .mr(1)
37165 .nr(4)
37166 .kr(8)
37167 .sr(1)
37168 .m(1)
37169 .n(4)
37170 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080037171 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070037172 }
37173 }
37174
Marat Dukhandfc2db02021-08-08 21:19:07 -070037175 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL16_LD128, k_lt_8_subtile) {
Marat Dukhan4741e412021-06-30 13:38:06 -070037176 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080037177 for (uint32_t n = 1; n <= 4; n++) {
37178 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan4741e412021-06-30 13:38:06 -070037179 GemmMicrokernelTester()
37180 .mr(1)
37181 .nr(4)
37182 .kr(8)
37183 .sr(1)
37184 .m(m)
37185 .n(n)
37186 .k(k)
37187 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080037188 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070037189 }
37190 }
37191 }
37192 }
37193
Marat Dukhandfc2db02021-08-08 21:19:07 -070037194 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL16_LD128, k_gt_8) {
Marat Dukhan4741e412021-06-30 13:38:06 -070037195 for (size_t k = 9; k < 16; k++) {
37196 GemmMicrokernelTester()
37197 .mr(1)
37198 .nr(4)
37199 .kr(8)
37200 .sr(1)
37201 .m(1)
37202 .n(4)
37203 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080037204 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070037205 }
37206 }
37207
Marat Dukhandfc2db02021-08-08 21:19:07 -070037208 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL16_LD128, k_gt_8_subtile) {
Marat Dukhan4741e412021-06-30 13:38:06 -070037209 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080037210 for (uint32_t n = 1; n <= 4; n++) {
37211 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan4741e412021-06-30 13:38:06 -070037212 GemmMicrokernelTester()
37213 .mr(1)
37214 .nr(4)
37215 .kr(8)
37216 .sr(1)
37217 .m(m)
37218 .n(n)
37219 .k(k)
37220 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080037221 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070037222 }
37223 }
37224 }
37225 }
37226
Marat Dukhandfc2db02021-08-08 21:19:07 -070037227 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL16_LD128, k_div_8) {
Marat Dukhan4741e412021-06-30 13:38:06 -070037228 for (size_t k = 16; k <= 80; k += 8) {
37229 GemmMicrokernelTester()
37230 .mr(1)
37231 .nr(4)
37232 .kr(8)
37233 .sr(1)
37234 .m(1)
37235 .n(4)
37236 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080037237 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070037238 }
37239 }
37240
Marat Dukhandfc2db02021-08-08 21:19:07 -070037241 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL16_LD128, k_div_8_subtile) {
Marat Dukhan4741e412021-06-30 13:38:06 -070037242 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080037243 for (uint32_t n = 1; n <= 4; n++) {
37244 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan4741e412021-06-30 13:38:06 -070037245 GemmMicrokernelTester()
37246 .mr(1)
37247 .nr(4)
37248 .kr(8)
37249 .sr(1)
37250 .m(m)
37251 .n(n)
37252 .k(k)
37253 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080037254 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070037255 }
37256 }
37257 }
37258 }
37259
Marat Dukhandfc2db02021-08-08 21:19:07 -070037260 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL16_LD128, n_gt_4) {
Marat Dukhan4741e412021-06-30 13:38:06 -070037261 for (uint32_t n = 5; n < 8; n++) {
37262 for (size_t k = 1; k <= 40; k += 9) {
37263 GemmMicrokernelTester()
37264 .mr(1)
37265 .nr(4)
37266 .kr(8)
37267 .sr(1)
37268 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080037269 .n(n)
Marat Dukhan4741e412021-06-30 13:38:06 -070037270 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080037271 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070037272 }
37273 }
37274 }
37275
Marat Dukhandfc2db02021-08-08 21:19:07 -070037276 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL16_LD128, n_gt_4_strided_cn) {
Marat Dukhan4741e412021-06-30 13:38:06 -070037277 for (uint32_t n = 5; n < 8; n++) {
37278 for (size_t k = 1; k <= 40; k += 9) {
37279 GemmMicrokernelTester()
37280 .mr(1)
37281 .nr(4)
37282 .kr(8)
37283 .sr(1)
37284 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080037285 .n(n)
Marat Dukhan4741e412021-06-30 13:38:06 -070037286 .k(k)
37287 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080037288 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070037289 }
37290 }
37291 }
37292
Marat Dukhandfc2db02021-08-08 21:19:07 -070037293 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL16_LD128, n_gt_4_subtile) {
Marat Dukhan4741e412021-06-30 13:38:06 -070037294 for (uint32_t n = 5; n < 8; n++) {
37295 for (size_t k = 1; k <= 40; k += 9) {
37296 for (uint32_t m = 1; m <= 1; m++) {
37297 GemmMicrokernelTester()
37298 .mr(1)
37299 .nr(4)
37300 .kr(8)
37301 .sr(1)
37302 .m(m)
37303 .n(n)
37304 .k(k)
37305 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080037306 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070037307 }
37308 }
37309 }
37310 }
37311
Marat Dukhandfc2db02021-08-08 21:19:07 -070037312 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL16_LD128, n_div_4) {
Marat Dukhan4741e412021-06-30 13:38:06 -070037313 for (uint32_t n = 8; n <= 12; n += 4) {
37314 for (size_t k = 1; k <= 40; k += 9) {
37315 GemmMicrokernelTester()
37316 .mr(1)
37317 .nr(4)
37318 .kr(8)
37319 .sr(1)
37320 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080037321 .n(n)
Marat Dukhan4741e412021-06-30 13:38:06 -070037322 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080037323 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070037324 }
37325 }
37326 }
37327
Marat Dukhandfc2db02021-08-08 21:19:07 -070037328 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL16_LD128, n_div_4_strided_cn) {
Marat Dukhan4741e412021-06-30 13:38:06 -070037329 for (uint32_t n = 8; n <= 12; n += 4) {
37330 for (size_t k = 1; k <= 40; k += 9) {
37331 GemmMicrokernelTester()
37332 .mr(1)
37333 .nr(4)
37334 .kr(8)
37335 .sr(1)
37336 .m(1)
37337 .n(n)
37338 .k(k)
37339 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080037340 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070037341 }
37342 }
37343 }
37344
Marat Dukhandfc2db02021-08-08 21:19:07 -070037345 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL16_LD128, n_div_4_subtile) {
Marat Dukhan4741e412021-06-30 13:38:06 -070037346 for (uint32_t n = 8; n <= 12; n += 4) {
37347 for (size_t k = 1; k <= 40; k += 9) {
37348 for (uint32_t m = 1; m <= 1; m++) {
37349 GemmMicrokernelTester()
37350 .mr(1)
37351 .nr(4)
37352 .kr(8)
37353 .sr(1)
37354 .m(m)
37355 .n(n)
37356 .k(k)
37357 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080037358 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070037359 }
37360 }
37361 }
37362 }
37363
Marat Dukhandfc2db02021-08-08 21:19:07 -070037364 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL16_LD128, small_kernel) {
Marat Dukhan4741e412021-06-30 13:38:06 -070037365 for (size_t k = 1; k <= 40; k += 9) {
37366 GemmMicrokernelTester()
37367 .mr(1)
37368 .nr(4)
37369 .kr(8)
37370 .sr(1)
37371 .m(1)
37372 .n(4)
37373 .k(k)
37374 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080037375 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070037376 }
37377 }
37378
Marat Dukhandfc2db02021-08-08 21:19:07 -070037379 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL16_LD128, small_kernel_subtile) {
Marat Dukhan4741e412021-06-30 13:38:06 -070037380 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080037381 for (uint32_t n = 1; n <= 4; n++) {
37382 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan4741e412021-06-30 13:38:06 -070037383 GemmMicrokernelTester()
37384 .mr(1)
37385 .nr(4)
37386 .kr(8)
37387 .sr(1)
37388 .m(m)
37389 .n(n)
37390 .k(k)
37391 .ks(3)
37392 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080037393 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070037394 }
37395 }
37396 }
37397 }
37398
Marat Dukhandfc2db02021-08-08 21:19:07 -070037399 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL16_LD128, n_gt_4_small_kernel) {
Marat Dukhan4741e412021-06-30 13:38:06 -070037400 for (uint32_t n = 5; n < 8; n++) {
37401 for (size_t k = 1; k <= 40; k += 9) {
37402 GemmMicrokernelTester()
37403 .mr(1)
37404 .nr(4)
37405 .kr(8)
37406 .sr(1)
37407 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080037408 .n(n)
Marat Dukhan4741e412021-06-30 13:38:06 -070037409 .k(k)
37410 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080037411 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070037412 }
37413 }
37414 }
37415
Marat Dukhandfc2db02021-08-08 21:19:07 -070037416 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL16_LD128, n_div_4_small_kernel) {
Marat Dukhan4741e412021-06-30 13:38:06 -070037417 for (uint32_t n = 8; n <= 12; n += 4) {
37418 for (size_t k = 1; k <= 40; k += 9) {
37419 GemmMicrokernelTester()
37420 .mr(1)
37421 .nr(4)
37422 .kr(8)
37423 .sr(1)
37424 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080037425 .n(n)
Marat Dukhan4741e412021-06-30 13:38:06 -070037426 .k(k)
37427 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080037428 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070037429 }
37430 }
37431 }
37432
Marat Dukhandfc2db02021-08-08 21:19:07 -070037433 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL16_LD128, strided_cm_subtile) {
Marat Dukhan4741e412021-06-30 13:38:06 -070037434 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080037435 for (uint32_t n = 1; n <= 4; n++) {
37436 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan4741e412021-06-30 13:38:06 -070037437 GemmMicrokernelTester()
37438 .mr(1)
37439 .nr(4)
37440 .kr(8)
37441 .sr(1)
37442 .m(m)
37443 .n(n)
37444 .k(k)
37445 .cm_stride(7)
37446 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080037447 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070037448 }
37449 }
37450 }
37451 }
37452
Marat Dukhandfc2db02021-08-08 21:19:07 -070037453 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL16_LD128, a_offset) {
Marat Dukhan4741e412021-06-30 13:38:06 -070037454 for (size_t k = 1; k <= 40; k += 9) {
37455 GemmMicrokernelTester()
37456 .mr(1)
37457 .nr(4)
37458 .kr(8)
37459 .sr(1)
37460 .m(1)
37461 .n(4)
37462 .k(k)
37463 .ks(3)
37464 .a_offset(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080037465 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070037466 }
37467 }
37468
Marat Dukhandfc2db02021-08-08 21:19:07 -070037469 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL16_LD128, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080037470 for (size_t k = 1; k <= 40; k += 9) {
37471 for (uint32_t mz = 0; mz < 1; mz++) {
Marat Dukhan4741e412021-06-30 13:38:06 -070037472 GemmMicrokernelTester()
37473 .mr(1)
37474 .nr(4)
37475 .kr(8)
37476 .sr(1)
37477 .m(1)
37478 .n(4)
37479 .k(k)
37480 .ks(3)
37481 .a_offset(43)
37482 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080037483 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070037484 }
37485 }
37486 }
37487
Marat Dukhandfc2db02021-08-08 21:19:07 -070037488 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL16_LD128, qmin) {
Marat Dukhan4741e412021-06-30 13:38:06 -070037489 GemmMicrokernelTester()
37490 .mr(1)
37491 .nr(4)
37492 .kr(8)
37493 .sr(1)
37494 .m(1)
37495 .n(4)
37496 .k(8)
37497 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080037498 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070037499 }
37500
Marat Dukhandfc2db02021-08-08 21:19:07 -070037501 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL16_LD128, qmax) {
Marat Dukhan4741e412021-06-30 13:38:06 -070037502 GemmMicrokernelTester()
37503 .mr(1)
37504 .nr(4)
37505 .kr(8)
37506 .sr(1)
37507 .m(1)
37508 .n(4)
37509 .k(8)
37510 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080037511 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070037512 }
37513
Marat Dukhandfc2db02021-08-08 21:19:07 -070037514 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL16_LD128, strided_cm) {
Marat Dukhan4741e412021-06-30 13:38:06 -070037515 GemmMicrokernelTester()
37516 .mr(1)
37517 .nr(4)
37518 .kr(8)
37519 .sr(1)
37520 .m(1)
37521 .n(4)
37522 .k(8)
37523 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080037524 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070037525 }
Marat Dukhan4c617792021-12-21 15:47:58 -080037526#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan4741e412021-06-30 13:38:06 -070037527
37528
Marat Dukhan4c617792021-12-21 15:47:58 -080037529#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhandfc2db02021-08-08 21:19:07 -070037530 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL16_LD128, k_eq_8) {
Marat Dukhan4741e412021-06-30 13:38:06 -070037531 GemmMicrokernelTester()
37532 .mr(3)
37533 .nr(4)
37534 .kr(8)
37535 .sr(1)
37536 .m(3)
37537 .n(4)
37538 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080037539 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070037540 }
37541
Marat Dukhandfc2db02021-08-08 21:19:07 -070037542 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL16_LD128, strided_cn) {
Marat Dukhan4741e412021-06-30 13:38:06 -070037543 GemmMicrokernelTester()
37544 .mr(3)
37545 .nr(4)
37546 .kr(8)
37547 .sr(1)
37548 .m(3)
37549 .n(4)
37550 .k(8)
37551 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080037552 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070037553 }
37554
Marat Dukhandfc2db02021-08-08 21:19:07 -070037555 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL16_LD128, k_eq_8_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080037556 for (uint32_t n = 1; n <= 4; n++) {
37557 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan4741e412021-06-30 13:38:06 -070037558 GemmMicrokernelTester()
37559 .mr(3)
37560 .nr(4)
37561 .kr(8)
37562 .sr(1)
37563 .m(m)
37564 .n(n)
37565 .k(8)
37566 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080037567 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070037568 }
37569 }
37570 }
37571
Marat Dukhandfc2db02021-08-08 21:19:07 -070037572 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL16_LD128, k_eq_8_subtile_m) {
Marat Dukhan4741e412021-06-30 13:38:06 -070037573 for (uint32_t m = 1; m <= 3; m++) {
37574 GemmMicrokernelTester()
37575 .mr(3)
37576 .nr(4)
37577 .kr(8)
37578 .sr(1)
37579 .m(m)
37580 .n(4)
37581 .k(8)
37582 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080037583 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070037584 }
37585 }
37586
Marat Dukhandfc2db02021-08-08 21:19:07 -070037587 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL16_LD128, k_eq_8_subtile_n) {
Marat Dukhan4741e412021-06-30 13:38:06 -070037588 for (uint32_t n = 1; n <= 4; n++) {
37589 GemmMicrokernelTester()
37590 .mr(3)
37591 .nr(4)
37592 .kr(8)
37593 .sr(1)
37594 .m(3)
37595 .n(n)
37596 .k(8)
37597 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080037598 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070037599 }
37600 }
37601
Marat Dukhandfc2db02021-08-08 21:19:07 -070037602 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL16_LD128, k_lt_8) {
Marat Dukhan4741e412021-06-30 13:38:06 -070037603 for (size_t k = 1; k < 8; k++) {
37604 GemmMicrokernelTester()
37605 .mr(3)
37606 .nr(4)
37607 .kr(8)
37608 .sr(1)
37609 .m(3)
37610 .n(4)
37611 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080037612 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070037613 }
37614 }
37615
Marat Dukhandfc2db02021-08-08 21:19:07 -070037616 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL16_LD128, k_lt_8_subtile) {
Marat Dukhan4741e412021-06-30 13:38:06 -070037617 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080037618 for (uint32_t n = 1; n <= 4; n++) {
37619 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan4741e412021-06-30 13:38:06 -070037620 GemmMicrokernelTester()
37621 .mr(3)
37622 .nr(4)
37623 .kr(8)
37624 .sr(1)
37625 .m(m)
37626 .n(n)
37627 .k(k)
37628 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080037629 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070037630 }
37631 }
37632 }
37633 }
37634
Marat Dukhandfc2db02021-08-08 21:19:07 -070037635 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL16_LD128, k_gt_8) {
Marat Dukhan4741e412021-06-30 13:38:06 -070037636 for (size_t k = 9; k < 16; k++) {
37637 GemmMicrokernelTester()
37638 .mr(3)
37639 .nr(4)
37640 .kr(8)
37641 .sr(1)
37642 .m(3)
37643 .n(4)
37644 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080037645 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070037646 }
37647 }
37648
Marat Dukhandfc2db02021-08-08 21:19:07 -070037649 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL16_LD128, k_gt_8_subtile) {
Marat Dukhan4741e412021-06-30 13:38:06 -070037650 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080037651 for (uint32_t n = 1; n <= 4; n++) {
37652 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan4741e412021-06-30 13:38:06 -070037653 GemmMicrokernelTester()
37654 .mr(3)
37655 .nr(4)
37656 .kr(8)
37657 .sr(1)
37658 .m(m)
37659 .n(n)
37660 .k(k)
37661 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080037662 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070037663 }
37664 }
37665 }
37666 }
37667
Marat Dukhandfc2db02021-08-08 21:19:07 -070037668 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL16_LD128, k_div_8) {
Marat Dukhan4741e412021-06-30 13:38:06 -070037669 for (size_t k = 16; k <= 80; k += 8) {
37670 GemmMicrokernelTester()
37671 .mr(3)
37672 .nr(4)
37673 .kr(8)
37674 .sr(1)
37675 .m(3)
37676 .n(4)
37677 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080037678 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070037679 }
37680 }
37681
Marat Dukhandfc2db02021-08-08 21:19:07 -070037682 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL16_LD128, k_div_8_subtile) {
Marat Dukhan4741e412021-06-30 13:38:06 -070037683 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080037684 for (uint32_t n = 1; n <= 4; n++) {
37685 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan4741e412021-06-30 13:38:06 -070037686 GemmMicrokernelTester()
37687 .mr(3)
37688 .nr(4)
37689 .kr(8)
37690 .sr(1)
37691 .m(m)
37692 .n(n)
37693 .k(k)
37694 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080037695 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070037696 }
37697 }
37698 }
37699 }
37700
Marat Dukhandfc2db02021-08-08 21:19:07 -070037701 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL16_LD128, n_gt_4) {
Marat Dukhan4741e412021-06-30 13:38:06 -070037702 for (uint32_t n = 5; n < 8; n++) {
37703 for (size_t k = 1; k <= 40; k += 9) {
37704 GemmMicrokernelTester()
37705 .mr(3)
37706 .nr(4)
37707 .kr(8)
37708 .sr(1)
37709 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080037710 .n(n)
Marat Dukhan4741e412021-06-30 13:38:06 -070037711 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080037712 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070037713 }
37714 }
37715 }
37716
Marat Dukhandfc2db02021-08-08 21:19:07 -070037717 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL16_LD128, n_gt_4_strided_cn) {
Marat Dukhan4741e412021-06-30 13:38:06 -070037718 for (uint32_t n = 5; n < 8; n++) {
37719 for (size_t k = 1; k <= 40; k += 9) {
37720 GemmMicrokernelTester()
37721 .mr(3)
37722 .nr(4)
37723 .kr(8)
37724 .sr(1)
37725 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080037726 .n(n)
Marat Dukhan4741e412021-06-30 13:38:06 -070037727 .k(k)
37728 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080037729 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070037730 }
37731 }
37732 }
37733
Marat Dukhandfc2db02021-08-08 21:19:07 -070037734 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL16_LD128, n_gt_4_subtile) {
Marat Dukhan4741e412021-06-30 13:38:06 -070037735 for (uint32_t n = 5; n < 8; n++) {
37736 for (size_t k = 1; k <= 40; k += 9) {
37737 for (uint32_t m = 1; m <= 3; m++) {
37738 GemmMicrokernelTester()
37739 .mr(3)
37740 .nr(4)
37741 .kr(8)
37742 .sr(1)
37743 .m(m)
37744 .n(n)
37745 .k(k)
37746 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080037747 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070037748 }
37749 }
37750 }
37751 }
37752
Marat Dukhandfc2db02021-08-08 21:19:07 -070037753 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL16_LD128, n_div_4) {
Marat Dukhan4741e412021-06-30 13:38:06 -070037754 for (uint32_t n = 8; n <= 12; n += 4) {
37755 for (size_t k = 1; k <= 40; k += 9) {
37756 GemmMicrokernelTester()
37757 .mr(3)
37758 .nr(4)
37759 .kr(8)
37760 .sr(1)
37761 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080037762 .n(n)
Marat Dukhan4741e412021-06-30 13:38:06 -070037763 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080037764 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070037765 }
37766 }
37767 }
37768
Marat Dukhandfc2db02021-08-08 21:19:07 -070037769 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL16_LD128, n_div_4_strided_cn) {
Marat Dukhan4741e412021-06-30 13:38:06 -070037770 for (uint32_t n = 8; n <= 12; n += 4) {
37771 for (size_t k = 1; k <= 40; k += 9) {
37772 GemmMicrokernelTester()
37773 .mr(3)
37774 .nr(4)
37775 .kr(8)
37776 .sr(1)
37777 .m(3)
37778 .n(n)
37779 .k(k)
37780 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080037781 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070037782 }
37783 }
37784 }
37785
Marat Dukhandfc2db02021-08-08 21:19:07 -070037786 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL16_LD128, n_div_4_subtile) {
Marat Dukhan4741e412021-06-30 13:38:06 -070037787 for (uint32_t n = 8; n <= 12; n += 4) {
37788 for (size_t k = 1; k <= 40; k += 9) {
37789 for (uint32_t m = 1; m <= 3; m++) {
37790 GemmMicrokernelTester()
37791 .mr(3)
37792 .nr(4)
37793 .kr(8)
37794 .sr(1)
37795 .m(m)
37796 .n(n)
37797 .k(k)
37798 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080037799 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070037800 }
37801 }
37802 }
37803 }
37804
Marat Dukhandfc2db02021-08-08 21:19:07 -070037805 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL16_LD128, small_kernel) {
Marat Dukhan4741e412021-06-30 13:38:06 -070037806 for (size_t k = 1; k <= 40; k += 9) {
37807 GemmMicrokernelTester()
37808 .mr(3)
37809 .nr(4)
37810 .kr(8)
37811 .sr(1)
37812 .m(3)
37813 .n(4)
37814 .k(k)
37815 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080037816 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070037817 }
37818 }
37819
Marat Dukhandfc2db02021-08-08 21:19:07 -070037820 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL16_LD128, small_kernel_subtile) {
Marat Dukhan4741e412021-06-30 13:38:06 -070037821 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080037822 for (uint32_t n = 1; n <= 4; n++) {
37823 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan4741e412021-06-30 13:38:06 -070037824 GemmMicrokernelTester()
37825 .mr(3)
37826 .nr(4)
37827 .kr(8)
37828 .sr(1)
37829 .m(m)
37830 .n(n)
37831 .k(k)
37832 .ks(3)
37833 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080037834 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070037835 }
37836 }
37837 }
37838 }
37839
Marat Dukhandfc2db02021-08-08 21:19:07 -070037840 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL16_LD128, n_gt_4_small_kernel) {
Marat Dukhan4741e412021-06-30 13:38:06 -070037841 for (uint32_t n = 5; n < 8; n++) {
37842 for (size_t k = 1; k <= 40; k += 9) {
37843 GemmMicrokernelTester()
37844 .mr(3)
37845 .nr(4)
37846 .kr(8)
37847 .sr(1)
37848 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080037849 .n(n)
Marat Dukhan4741e412021-06-30 13:38:06 -070037850 .k(k)
37851 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080037852 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070037853 }
37854 }
37855 }
37856
Marat Dukhandfc2db02021-08-08 21:19:07 -070037857 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL16_LD128, n_div_4_small_kernel) {
Marat Dukhan4741e412021-06-30 13:38:06 -070037858 for (uint32_t n = 8; n <= 12; n += 4) {
37859 for (size_t k = 1; k <= 40; k += 9) {
37860 GemmMicrokernelTester()
37861 .mr(3)
37862 .nr(4)
37863 .kr(8)
37864 .sr(1)
37865 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080037866 .n(n)
Marat Dukhan4741e412021-06-30 13:38:06 -070037867 .k(k)
37868 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080037869 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070037870 }
37871 }
37872 }
37873
Marat Dukhandfc2db02021-08-08 21:19:07 -070037874 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL16_LD128, strided_cm_subtile) {
Marat Dukhan4741e412021-06-30 13:38:06 -070037875 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080037876 for (uint32_t n = 1; n <= 4; n++) {
37877 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan4741e412021-06-30 13:38:06 -070037878 GemmMicrokernelTester()
37879 .mr(3)
37880 .nr(4)
37881 .kr(8)
37882 .sr(1)
37883 .m(m)
37884 .n(n)
37885 .k(k)
37886 .cm_stride(7)
37887 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080037888 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070037889 }
37890 }
37891 }
37892 }
37893
Marat Dukhandfc2db02021-08-08 21:19:07 -070037894 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL16_LD128, a_offset) {
Marat Dukhan4741e412021-06-30 13:38:06 -070037895 for (size_t k = 1; k <= 40; k += 9) {
37896 GemmMicrokernelTester()
37897 .mr(3)
37898 .nr(4)
37899 .kr(8)
37900 .sr(1)
37901 .m(3)
37902 .n(4)
37903 .k(k)
37904 .ks(3)
37905 .a_offset(127)
Marat Dukhan50323b82022-01-11 00:12:01 -080037906 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070037907 }
37908 }
37909
Marat Dukhandfc2db02021-08-08 21:19:07 -070037910 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL16_LD128, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080037911 for (size_t k = 1; k <= 40; k += 9) {
37912 for (uint32_t mz = 0; mz < 3; mz++) {
Marat Dukhan4741e412021-06-30 13:38:06 -070037913 GemmMicrokernelTester()
37914 .mr(3)
37915 .nr(4)
37916 .kr(8)
37917 .sr(1)
37918 .m(3)
37919 .n(4)
37920 .k(k)
37921 .ks(3)
37922 .a_offset(127)
37923 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080037924 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070037925 }
37926 }
37927 }
37928
Marat Dukhandfc2db02021-08-08 21:19:07 -070037929 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL16_LD128, qmin) {
Marat Dukhan4741e412021-06-30 13:38:06 -070037930 GemmMicrokernelTester()
37931 .mr(3)
37932 .nr(4)
37933 .kr(8)
37934 .sr(1)
37935 .m(3)
37936 .n(4)
37937 .k(8)
37938 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080037939 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070037940 }
37941
Marat Dukhandfc2db02021-08-08 21:19:07 -070037942 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL16_LD128, qmax) {
Marat Dukhan4741e412021-06-30 13:38:06 -070037943 GemmMicrokernelTester()
37944 .mr(3)
37945 .nr(4)
37946 .kr(8)
37947 .sr(1)
37948 .m(3)
37949 .n(4)
37950 .k(8)
37951 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080037952 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070037953 }
37954
Marat Dukhandfc2db02021-08-08 21:19:07 -070037955 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL16_LD128, strided_cm) {
Marat Dukhan4741e412021-06-30 13:38:06 -070037956 GemmMicrokernelTester()
37957 .mr(3)
37958 .nr(4)
37959 .kr(8)
37960 .sr(1)
37961 .m(3)
37962 .n(4)
37963 .k(8)
37964 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080037965 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070037966 }
Marat Dukhan4c617792021-12-21 15:47:58 -080037967#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan4741e412021-06-30 13:38:06 -070037968
37969
Marat Dukhan7c1115f2022-01-04 17:18:41 -080037970#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
37971 TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, k_eq_1) {
37972 GemmMicrokernelTester()
37973 .mr(1)
37974 .nr(2)
37975 .kr(1)
37976 .sr(1)
37977 .m(1)
37978 .n(2)
37979 .k(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080037980 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080037981 }
37982
37983 TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, strided_cn) {
37984 GemmMicrokernelTester()
37985 .mr(1)
37986 .nr(2)
37987 .kr(1)
37988 .sr(1)
37989 .m(1)
37990 .n(2)
37991 .k(1)
37992 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080037993 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080037994 }
37995
37996 TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080037997 for (uint32_t n = 1; n <= 2; n++) {
37998 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan7c1115f2022-01-04 17:18:41 -080037999 GemmMicrokernelTester()
38000 .mr(1)
38001 .nr(2)
38002 .kr(1)
38003 .sr(1)
38004 .m(m)
38005 .n(n)
38006 .k(1)
38007 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080038008 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038009 }
38010 }
38011 }
38012
38013 TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, k_eq_1_subtile_m) {
38014 for (uint32_t m = 1; m <= 1; m++) {
38015 GemmMicrokernelTester()
38016 .mr(1)
38017 .nr(2)
38018 .kr(1)
38019 .sr(1)
38020 .m(m)
38021 .n(2)
38022 .k(1)
38023 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080038024 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038025 }
38026 }
38027
38028 TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, k_eq_1_subtile_n) {
38029 for (uint32_t n = 1; n <= 2; n++) {
38030 GemmMicrokernelTester()
38031 .mr(1)
38032 .nr(2)
38033 .kr(1)
38034 .sr(1)
38035 .m(1)
38036 .n(n)
38037 .k(1)
38038 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080038039 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038040 }
38041 }
38042
38043 TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, k_gt_1) {
38044 for (size_t k = 2; k < 10; k++) {
38045 GemmMicrokernelTester()
38046 .mr(1)
38047 .nr(2)
38048 .kr(1)
38049 .sr(1)
38050 .m(1)
38051 .n(2)
38052 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080038053 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038054 }
38055 }
38056
38057 TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, k_gt_1_subtile) {
38058 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080038059 for (uint32_t n = 1; n <= 2; n++) {
38060 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038061 GemmMicrokernelTester()
38062 .mr(1)
38063 .nr(2)
38064 .kr(1)
38065 .sr(1)
38066 .m(m)
38067 .n(n)
38068 .k(k)
38069 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080038070 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038071 }
38072 }
38073 }
38074 }
38075
38076 TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_gt_2) {
38077 for (uint32_t n = 3; n < 4; n++) {
38078 for (size_t k = 1; k <= 5; k += 2) {
38079 GemmMicrokernelTester()
38080 .mr(1)
38081 .nr(2)
38082 .kr(1)
38083 .sr(1)
38084 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080038085 .n(n)
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038086 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080038087 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038088 }
38089 }
38090 }
38091
38092 TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_gt_2_strided_cn) {
38093 for (uint32_t n = 3; n < 4; n++) {
38094 for (size_t k = 1; k <= 5; k += 2) {
38095 GemmMicrokernelTester()
38096 .mr(1)
38097 .nr(2)
38098 .kr(1)
38099 .sr(1)
38100 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080038101 .n(n)
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038102 .k(k)
38103 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080038104 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038105 }
38106 }
38107 }
38108
38109 TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_gt_2_subtile) {
38110 for (uint32_t n = 3; n < 4; n++) {
38111 for (size_t k = 1; k <= 5; k += 2) {
38112 for (uint32_t m = 1; m <= 1; m++) {
38113 GemmMicrokernelTester()
38114 .mr(1)
38115 .nr(2)
38116 .kr(1)
38117 .sr(1)
38118 .m(m)
38119 .n(n)
38120 .k(k)
38121 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080038122 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038123 }
38124 }
38125 }
38126 }
38127
38128 TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_div_2) {
38129 for (uint32_t n = 4; n <= 6; n += 2) {
38130 for (size_t k = 1; k <= 5; k += 2) {
38131 GemmMicrokernelTester()
38132 .mr(1)
38133 .nr(2)
38134 .kr(1)
38135 .sr(1)
38136 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080038137 .n(n)
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038138 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080038139 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038140 }
38141 }
38142 }
38143
38144 TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_div_2_strided_cn) {
38145 for (uint32_t n = 4; n <= 6; n += 2) {
38146 for (size_t k = 1; k <= 5; k += 2) {
38147 GemmMicrokernelTester()
38148 .mr(1)
38149 .nr(2)
38150 .kr(1)
38151 .sr(1)
38152 .m(1)
38153 .n(n)
38154 .k(k)
38155 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080038156 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038157 }
38158 }
38159 }
38160
38161 TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_div_2_subtile) {
38162 for (uint32_t n = 4; n <= 6; n += 2) {
38163 for (size_t k = 1; k <= 5; k += 2) {
38164 for (uint32_t m = 1; m <= 1; m++) {
38165 GemmMicrokernelTester()
38166 .mr(1)
38167 .nr(2)
38168 .kr(1)
38169 .sr(1)
38170 .m(m)
38171 .n(n)
38172 .k(k)
38173 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080038174 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038175 }
38176 }
38177 }
38178 }
38179
38180 TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, small_kernel) {
38181 for (size_t k = 1; k <= 5; k += 2) {
38182 GemmMicrokernelTester()
38183 .mr(1)
38184 .nr(2)
38185 .kr(1)
38186 .sr(1)
38187 .m(1)
38188 .n(2)
38189 .k(k)
38190 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080038191 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038192 }
38193 }
38194
38195 TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, small_kernel_subtile) {
38196 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080038197 for (uint32_t n = 1; n <= 2; n++) {
38198 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038199 GemmMicrokernelTester()
38200 .mr(1)
38201 .nr(2)
38202 .kr(1)
38203 .sr(1)
38204 .m(m)
38205 .n(n)
38206 .k(k)
38207 .ks(3)
38208 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080038209 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038210 }
38211 }
38212 }
38213 }
38214
38215 TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_gt_2_small_kernel) {
38216 for (uint32_t n = 3; n < 4; n++) {
38217 for (size_t k = 1; k <= 5; k += 2) {
38218 GemmMicrokernelTester()
38219 .mr(1)
38220 .nr(2)
38221 .kr(1)
38222 .sr(1)
38223 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080038224 .n(n)
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038225 .k(k)
38226 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080038227 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038228 }
38229 }
38230 }
38231
38232 TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_div_2_small_kernel) {
38233 for (uint32_t n = 4; n <= 6; n += 2) {
38234 for (size_t k = 1; k <= 5; k += 2) {
38235 GemmMicrokernelTester()
38236 .mr(1)
38237 .nr(2)
38238 .kr(1)
38239 .sr(1)
38240 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080038241 .n(n)
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038242 .k(k)
38243 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080038244 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038245 }
38246 }
38247 }
38248
38249 TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, strided_cm_subtile) {
38250 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080038251 for (uint32_t n = 1; n <= 2; n++) {
38252 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038253 GemmMicrokernelTester()
38254 .mr(1)
38255 .nr(2)
38256 .kr(1)
38257 .sr(1)
38258 .m(m)
38259 .n(n)
38260 .k(k)
38261 .cm_stride(5)
38262 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080038263 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038264 }
38265 }
38266 }
38267 }
38268
38269 TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, a_offset) {
38270 for (size_t k = 1; k <= 5; k += 2) {
38271 GemmMicrokernelTester()
38272 .mr(1)
38273 .nr(2)
38274 .kr(1)
38275 .sr(1)
38276 .m(1)
38277 .n(2)
38278 .k(k)
38279 .ks(3)
38280 .a_offset(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080038281 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038282 }
38283 }
38284
38285 TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080038286 for (size_t k = 1; k <= 5; k += 2) {
38287 for (uint32_t mz = 0; mz < 1; mz++) {
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038288 GemmMicrokernelTester()
38289 .mr(1)
38290 .nr(2)
38291 .kr(1)
38292 .sr(1)
38293 .m(1)
38294 .n(2)
38295 .k(k)
38296 .ks(3)
38297 .a_offset(7)
38298 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080038299 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038300 }
38301 }
38302 }
38303
38304 TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, qmin) {
38305 GemmMicrokernelTester()
38306 .mr(1)
38307 .nr(2)
38308 .kr(1)
38309 .sr(1)
38310 .m(1)
38311 .n(2)
38312 .k(1)
38313 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080038314 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038315 }
38316
38317 TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, qmax) {
38318 GemmMicrokernelTester()
38319 .mr(1)
38320 .nr(2)
38321 .kr(1)
38322 .sr(1)
38323 .m(1)
38324 .n(2)
38325 .k(1)
38326 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080038327 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038328 }
38329
38330 TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, strided_cm) {
38331 GemmMicrokernelTester()
38332 .mr(1)
38333 .nr(2)
38334 .kr(1)
38335 .sr(1)
38336 .m(1)
38337 .n(2)
38338 .k(1)
38339 .cm_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080038340 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038341 }
38342#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
38343
38344
38345#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
38346 TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, k_eq_1) {
38347 GemmMicrokernelTester()
38348 .mr(2)
38349 .nr(2)
38350 .kr(1)
38351 .sr(1)
38352 .m(2)
38353 .n(2)
38354 .k(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080038355 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038356 }
38357
38358 TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, strided_cn) {
38359 GemmMicrokernelTester()
38360 .mr(2)
38361 .nr(2)
38362 .kr(1)
38363 .sr(1)
38364 .m(2)
38365 .n(2)
38366 .k(1)
38367 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080038368 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038369 }
38370
38371 TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080038372 for (uint32_t n = 1; n <= 2; n++) {
38373 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038374 GemmMicrokernelTester()
38375 .mr(2)
38376 .nr(2)
38377 .kr(1)
38378 .sr(1)
38379 .m(m)
38380 .n(n)
38381 .k(1)
38382 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080038383 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038384 }
38385 }
38386 }
38387
38388 TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, k_eq_1_subtile_m) {
38389 for (uint32_t m = 1; m <= 2; m++) {
38390 GemmMicrokernelTester()
38391 .mr(2)
38392 .nr(2)
38393 .kr(1)
38394 .sr(1)
38395 .m(m)
38396 .n(2)
38397 .k(1)
38398 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080038399 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038400 }
38401 }
38402
38403 TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, k_eq_1_subtile_n) {
38404 for (uint32_t n = 1; n <= 2; n++) {
38405 GemmMicrokernelTester()
38406 .mr(2)
38407 .nr(2)
38408 .kr(1)
38409 .sr(1)
38410 .m(2)
38411 .n(n)
38412 .k(1)
38413 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080038414 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038415 }
38416 }
38417
38418 TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, k_gt_1) {
38419 for (size_t k = 2; k < 10; k++) {
38420 GemmMicrokernelTester()
38421 .mr(2)
38422 .nr(2)
38423 .kr(1)
38424 .sr(1)
38425 .m(2)
38426 .n(2)
38427 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080038428 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038429 }
38430 }
38431
38432 TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, k_gt_1_subtile) {
38433 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080038434 for (uint32_t n = 1; n <= 2; n++) {
38435 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038436 GemmMicrokernelTester()
38437 .mr(2)
38438 .nr(2)
38439 .kr(1)
38440 .sr(1)
38441 .m(m)
38442 .n(n)
38443 .k(k)
38444 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080038445 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038446 }
38447 }
38448 }
38449 }
38450
38451 TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_gt_2) {
38452 for (uint32_t n = 3; n < 4; n++) {
38453 for (size_t k = 1; k <= 5; k += 2) {
38454 GemmMicrokernelTester()
38455 .mr(2)
38456 .nr(2)
38457 .kr(1)
38458 .sr(1)
38459 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080038460 .n(n)
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038461 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080038462 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038463 }
38464 }
38465 }
38466
38467 TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_gt_2_strided_cn) {
38468 for (uint32_t n = 3; n < 4; n++) {
38469 for (size_t k = 1; k <= 5; k += 2) {
38470 GemmMicrokernelTester()
38471 .mr(2)
38472 .nr(2)
38473 .kr(1)
38474 .sr(1)
38475 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080038476 .n(n)
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038477 .k(k)
38478 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080038479 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038480 }
38481 }
38482 }
38483
38484 TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_gt_2_subtile) {
38485 for (uint32_t n = 3; n < 4; n++) {
38486 for (size_t k = 1; k <= 5; k += 2) {
38487 for (uint32_t m = 1; m <= 2; m++) {
38488 GemmMicrokernelTester()
38489 .mr(2)
38490 .nr(2)
38491 .kr(1)
38492 .sr(1)
38493 .m(m)
38494 .n(n)
38495 .k(k)
38496 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080038497 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038498 }
38499 }
38500 }
38501 }
38502
38503 TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_div_2) {
38504 for (uint32_t n = 4; n <= 6; n += 2) {
38505 for (size_t k = 1; k <= 5; k += 2) {
38506 GemmMicrokernelTester()
38507 .mr(2)
38508 .nr(2)
38509 .kr(1)
38510 .sr(1)
38511 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080038512 .n(n)
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038513 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080038514 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038515 }
38516 }
38517 }
38518
38519 TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_div_2_strided_cn) {
38520 for (uint32_t n = 4; n <= 6; n += 2) {
38521 for (size_t k = 1; k <= 5; k += 2) {
38522 GemmMicrokernelTester()
38523 .mr(2)
38524 .nr(2)
38525 .kr(1)
38526 .sr(1)
38527 .m(2)
38528 .n(n)
38529 .k(k)
38530 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080038531 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038532 }
38533 }
38534 }
38535
38536 TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_div_2_subtile) {
38537 for (uint32_t n = 4; n <= 6; n += 2) {
38538 for (size_t k = 1; k <= 5; k += 2) {
38539 for (uint32_t m = 1; m <= 2; m++) {
38540 GemmMicrokernelTester()
38541 .mr(2)
38542 .nr(2)
38543 .kr(1)
38544 .sr(1)
38545 .m(m)
38546 .n(n)
38547 .k(k)
38548 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080038549 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038550 }
38551 }
38552 }
38553 }
38554
38555 TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, small_kernel) {
38556 for (size_t k = 1; k <= 5; k += 2) {
38557 GemmMicrokernelTester()
38558 .mr(2)
38559 .nr(2)
38560 .kr(1)
38561 .sr(1)
38562 .m(2)
38563 .n(2)
38564 .k(k)
38565 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080038566 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038567 }
38568 }
38569
38570 TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, small_kernel_subtile) {
38571 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080038572 for (uint32_t n = 1; n <= 2; n++) {
38573 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038574 GemmMicrokernelTester()
38575 .mr(2)
38576 .nr(2)
38577 .kr(1)
38578 .sr(1)
38579 .m(m)
38580 .n(n)
38581 .k(k)
38582 .ks(3)
38583 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080038584 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038585 }
38586 }
38587 }
38588 }
38589
38590 TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_gt_2_small_kernel) {
38591 for (uint32_t n = 3; n < 4; n++) {
38592 for (size_t k = 1; k <= 5; k += 2) {
38593 GemmMicrokernelTester()
38594 .mr(2)
38595 .nr(2)
38596 .kr(1)
38597 .sr(1)
38598 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080038599 .n(n)
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038600 .k(k)
38601 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080038602 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038603 }
38604 }
38605 }
38606
38607 TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_div_2_small_kernel) {
38608 for (uint32_t n = 4; n <= 6; n += 2) {
38609 for (size_t k = 1; k <= 5; k += 2) {
38610 GemmMicrokernelTester()
38611 .mr(2)
38612 .nr(2)
38613 .kr(1)
38614 .sr(1)
38615 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080038616 .n(n)
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038617 .k(k)
38618 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080038619 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038620 }
38621 }
38622 }
38623
38624 TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, strided_cm_subtile) {
38625 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080038626 for (uint32_t n = 1; n <= 2; n++) {
38627 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038628 GemmMicrokernelTester()
38629 .mr(2)
38630 .nr(2)
38631 .kr(1)
38632 .sr(1)
38633 .m(m)
38634 .n(n)
38635 .k(k)
38636 .cm_stride(5)
38637 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080038638 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038639 }
38640 }
38641 }
38642 }
38643
38644 TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, a_offset) {
38645 for (size_t k = 1; k <= 5; k += 2) {
38646 GemmMicrokernelTester()
38647 .mr(2)
38648 .nr(2)
38649 .kr(1)
38650 .sr(1)
38651 .m(2)
38652 .n(2)
38653 .k(k)
38654 .ks(3)
38655 .a_offset(13)
Marat Dukhan50323b82022-01-11 00:12:01 -080038656 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038657 }
38658 }
38659
38660 TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080038661 for (size_t k = 1; k <= 5; k += 2) {
38662 for (uint32_t mz = 0; mz < 2; mz++) {
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038663 GemmMicrokernelTester()
38664 .mr(2)
38665 .nr(2)
38666 .kr(1)
38667 .sr(1)
38668 .m(2)
38669 .n(2)
38670 .k(k)
38671 .ks(3)
38672 .a_offset(13)
38673 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080038674 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038675 }
38676 }
38677 }
38678
38679 TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, qmin) {
38680 GemmMicrokernelTester()
38681 .mr(2)
38682 .nr(2)
38683 .kr(1)
38684 .sr(1)
38685 .m(2)
38686 .n(2)
38687 .k(1)
38688 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080038689 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038690 }
38691
38692 TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, qmax) {
38693 GemmMicrokernelTester()
38694 .mr(2)
38695 .nr(2)
38696 .kr(1)
38697 .sr(1)
38698 .m(2)
38699 .n(2)
38700 .k(1)
38701 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080038702 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038703 }
38704
38705 TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, strided_cm) {
38706 GemmMicrokernelTester()
38707 .mr(2)
38708 .nr(2)
38709 .kr(1)
38710 .sr(1)
38711 .m(2)
38712 .n(2)
38713 .k(1)
38714 .cm_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080038715 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038716 }
38717#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
38718
38719
38720#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038721 TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, k_eq_1) {
38722 GemmMicrokernelTester()
38723 .mr(4)
38724 .nr(2)
38725 .kr(1)
38726 .sr(1)
38727 .m(4)
38728 .n(2)
38729 .k(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080038730 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038731 }
38732
38733 TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, strided_cn) {
38734 GemmMicrokernelTester()
38735 .mr(4)
38736 .nr(2)
38737 .kr(1)
38738 .sr(1)
38739 .m(4)
38740 .n(2)
38741 .k(1)
38742 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080038743 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038744 }
38745
38746 TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080038747 for (uint32_t n = 1; n <= 2; n++) {
38748 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038749 GemmMicrokernelTester()
38750 .mr(4)
38751 .nr(2)
38752 .kr(1)
38753 .sr(1)
38754 .m(m)
38755 .n(n)
38756 .k(1)
38757 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080038758 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038759 }
38760 }
38761 }
38762
38763 TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, k_eq_1_subtile_m) {
38764 for (uint32_t m = 1; m <= 4; m++) {
38765 GemmMicrokernelTester()
38766 .mr(4)
38767 .nr(2)
38768 .kr(1)
38769 .sr(1)
38770 .m(m)
38771 .n(2)
38772 .k(1)
38773 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080038774 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038775 }
38776 }
38777
38778 TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, k_eq_1_subtile_n) {
38779 for (uint32_t n = 1; n <= 2; n++) {
38780 GemmMicrokernelTester()
38781 .mr(4)
38782 .nr(2)
38783 .kr(1)
38784 .sr(1)
38785 .m(4)
38786 .n(n)
38787 .k(1)
38788 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080038789 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038790 }
38791 }
38792
38793 TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, k_gt_1) {
38794 for (size_t k = 2; k < 10; k++) {
38795 GemmMicrokernelTester()
38796 .mr(4)
38797 .nr(2)
38798 .kr(1)
38799 .sr(1)
38800 .m(4)
38801 .n(2)
38802 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080038803 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038804 }
38805 }
38806
38807 TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, k_gt_1_subtile) {
38808 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080038809 for (uint32_t n = 1; n <= 2; n++) {
38810 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038811 GemmMicrokernelTester()
38812 .mr(4)
38813 .nr(2)
38814 .kr(1)
38815 .sr(1)
38816 .m(m)
38817 .n(n)
38818 .k(k)
38819 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080038820 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038821 }
38822 }
38823 }
38824 }
38825
38826 TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_gt_2) {
38827 for (uint32_t n = 3; n < 4; n++) {
38828 for (size_t k = 1; k <= 5; k += 2) {
38829 GemmMicrokernelTester()
38830 .mr(4)
38831 .nr(2)
38832 .kr(1)
38833 .sr(1)
38834 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080038835 .n(n)
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038836 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080038837 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038838 }
38839 }
38840 }
38841
38842 TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_gt_2_strided_cn) {
38843 for (uint32_t n = 3; n < 4; n++) {
38844 for (size_t k = 1; k <= 5; k += 2) {
38845 GemmMicrokernelTester()
38846 .mr(4)
38847 .nr(2)
38848 .kr(1)
38849 .sr(1)
38850 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080038851 .n(n)
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038852 .k(k)
38853 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080038854 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038855 }
38856 }
38857 }
38858
38859 TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_gt_2_subtile) {
38860 for (uint32_t n = 3; n < 4; n++) {
38861 for (size_t k = 1; k <= 5; k += 2) {
38862 for (uint32_t m = 1; m <= 4; m++) {
38863 GemmMicrokernelTester()
38864 .mr(4)
38865 .nr(2)
38866 .kr(1)
38867 .sr(1)
38868 .m(m)
38869 .n(n)
38870 .k(k)
38871 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080038872 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038873 }
38874 }
38875 }
38876 }
38877
38878 TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_div_2) {
38879 for (uint32_t n = 4; n <= 6; n += 2) {
38880 for (size_t k = 1; k <= 5; k += 2) {
38881 GemmMicrokernelTester()
38882 .mr(4)
38883 .nr(2)
38884 .kr(1)
38885 .sr(1)
38886 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080038887 .n(n)
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038888 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080038889 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038890 }
38891 }
38892 }
38893
38894 TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_div_2_strided_cn) {
38895 for (uint32_t n = 4; n <= 6; n += 2) {
38896 for (size_t k = 1; k <= 5; k += 2) {
38897 GemmMicrokernelTester()
38898 .mr(4)
38899 .nr(2)
38900 .kr(1)
38901 .sr(1)
38902 .m(4)
38903 .n(n)
38904 .k(k)
38905 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080038906 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038907 }
38908 }
38909 }
38910
38911 TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_div_2_subtile) {
38912 for (uint32_t n = 4; n <= 6; n += 2) {
38913 for (size_t k = 1; k <= 5; k += 2) {
38914 for (uint32_t m = 1; m <= 4; m++) {
38915 GemmMicrokernelTester()
38916 .mr(4)
38917 .nr(2)
38918 .kr(1)
38919 .sr(1)
38920 .m(m)
38921 .n(n)
38922 .k(k)
38923 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080038924 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038925 }
38926 }
38927 }
38928 }
38929
38930 TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, small_kernel) {
38931 for (size_t k = 1; k <= 5; k += 2) {
38932 GemmMicrokernelTester()
38933 .mr(4)
38934 .nr(2)
38935 .kr(1)
38936 .sr(1)
38937 .m(4)
38938 .n(2)
38939 .k(k)
38940 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080038941 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038942 }
38943 }
38944
38945 TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, small_kernel_subtile) {
38946 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080038947 for (uint32_t n = 1; n <= 2; n++) {
38948 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038949 GemmMicrokernelTester()
38950 .mr(4)
38951 .nr(2)
38952 .kr(1)
38953 .sr(1)
38954 .m(m)
38955 .n(n)
38956 .k(k)
38957 .ks(3)
38958 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080038959 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038960 }
38961 }
38962 }
38963 }
38964
38965 TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_gt_2_small_kernel) {
38966 for (uint32_t n = 3; n < 4; n++) {
38967 for (size_t k = 1; k <= 5; k += 2) {
38968 GemmMicrokernelTester()
38969 .mr(4)
38970 .nr(2)
38971 .kr(1)
38972 .sr(1)
38973 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080038974 .n(n)
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038975 .k(k)
38976 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080038977 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038978 }
38979 }
38980 }
38981
38982 TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_div_2_small_kernel) {
38983 for (uint32_t n = 4; n <= 6; n += 2) {
38984 for (size_t k = 1; k <= 5; k += 2) {
38985 GemmMicrokernelTester()
38986 .mr(4)
38987 .nr(2)
38988 .kr(1)
38989 .sr(1)
38990 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080038991 .n(n)
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038992 .k(k)
38993 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080038994 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080038995 }
38996 }
38997 }
38998
38999 TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, strided_cm_subtile) {
39000 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080039001 for (uint32_t n = 1; n <= 2; n++) {
39002 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039003 GemmMicrokernelTester()
39004 .mr(4)
39005 .nr(2)
39006 .kr(1)
39007 .sr(1)
39008 .m(m)
39009 .n(n)
39010 .k(k)
39011 .cm_stride(5)
39012 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080039013 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039014 }
39015 }
39016 }
39017 }
39018
39019 TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, a_offset) {
39020 for (size_t k = 1; k <= 5; k += 2) {
39021 GemmMicrokernelTester()
39022 .mr(4)
39023 .nr(2)
39024 .kr(1)
39025 .sr(1)
39026 .m(4)
39027 .n(2)
39028 .k(k)
39029 .ks(3)
39030 .a_offset(23)
Marat Dukhan50323b82022-01-11 00:12:01 -080039031 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039032 }
39033 }
39034
39035 TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080039036 for (size_t k = 1; k <= 5; k += 2) {
39037 for (uint32_t mz = 0; mz < 4; mz++) {
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039038 GemmMicrokernelTester()
39039 .mr(4)
39040 .nr(2)
39041 .kr(1)
39042 .sr(1)
39043 .m(4)
39044 .n(2)
39045 .k(k)
39046 .ks(3)
39047 .a_offset(23)
39048 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080039049 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039050 }
39051 }
39052 }
39053
39054 TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, qmin) {
39055 GemmMicrokernelTester()
39056 .mr(4)
39057 .nr(2)
39058 .kr(1)
39059 .sr(1)
39060 .m(4)
39061 .n(2)
39062 .k(1)
39063 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080039064 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039065 }
39066
39067 TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, qmax) {
39068 GemmMicrokernelTester()
39069 .mr(4)
39070 .nr(2)
39071 .kr(1)
39072 .sr(1)
39073 .m(4)
39074 .n(2)
39075 .k(1)
39076 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080039077 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039078 }
39079
39080 TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, strided_cm) {
39081 GemmMicrokernelTester()
39082 .mr(4)
39083 .nr(2)
39084 .kr(1)
39085 .sr(1)
39086 .m(4)
39087 .n(2)
39088 .k(1)
39089 .cm_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080039090 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039091 }
39092#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
39093
39094
39095#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
39096 TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, k_eq_1) {
39097 GemmMicrokernelTester()
39098 .mr(1)
39099 .nr(4)
39100 .kr(1)
39101 .sr(1)
39102 .m(1)
39103 .n(4)
39104 .k(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080039105 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039106 }
39107
39108 TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, strided_cn) {
39109 GemmMicrokernelTester()
39110 .mr(1)
39111 .nr(4)
39112 .kr(1)
39113 .sr(1)
39114 .m(1)
39115 .n(4)
39116 .k(1)
39117 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080039118 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039119 }
39120
39121 TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080039122 for (uint32_t n = 1; n <= 4; n++) {
39123 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039124 GemmMicrokernelTester()
39125 .mr(1)
39126 .nr(4)
39127 .kr(1)
39128 .sr(1)
39129 .m(m)
39130 .n(n)
39131 .k(1)
39132 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080039133 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039134 }
39135 }
39136 }
39137
39138 TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, k_eq_1_subtile_m) {
39139 for (uint32_t m = 1; m <= 1; m++) {
39140 GemmMicrokernelTester()
39141 .mr(1)
39142 .nr(4)
39143 .kr(1)
39144 .sr(1)
39145 .m(m)
39146 .n(4)
39147 .k(1)
39148 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080039149 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039150 }
39151 }
39152
39153 TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, k_eq_1_subtile_n) {
39154 for (uint32_t n = 1; n <= 4; n++) {
39155 GemmMicrokernelTester()
39156 .mr(1)
39157 .nr(4)
39158 .kr(1)
39159 .sr(1)
39160 .m(1)
39161 .n(n)
39162 .k(1)
39163 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080039164 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039165 }
39166 }
39167
39168 TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, k_gt_1) {
39169 for (size_t k = 2; k < 10; k++) {
39170 GemmMicrokernelTester()
39171 .mr(1)
39172 .nr(4)
39173 .kr(1)
39174 .sr(1)
39175 .m(1)
39176 .n(4)
39177 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080039178 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039179 }
39180 }
39181
39182 TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, k_gt_1_subtile) {
39183 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080039184 for (uint32_t n = 1; n <= 4; n++) {
39185 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039186 GemmMicrokernelTester()
39187 .mr(1)
39188 .nr(4)
39189 .kr(1)
39190 .sr(1)
39191 .m(m)
39192 .n(n)
39193 .k(k)
39194 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080039195 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039196 }
39197 }
39198 }
39199 }
39200
39201 TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_gt_4) {
39202 for (uint32_t n = 5; n < 8; n++) {
39203 for (size_t k = 1; k <= 5; k += 2) {
39204 GemmMicrokernelTester()
39205 .mr(1)
39206 .nr(4)
39207 .kr(1)
39208 .sr(1)
39209 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080039210 .n(n)
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039211 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080039212 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039213 }
39214 }
39215 }
39216
39217 TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_gt_4_strided_cn) {
39218 for (uint32_t n = 5; n < 8; n++) {
39219 for (size_t k = 1; k <= 5; k += 2) {
39220 GemmMicrokernelTester()
39221 .mr(1)
39222 .nr(4)
39223 .kr(1)
39224 .sr(1)
39225 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080039226 .n(n)
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039227 .k(k)
39228 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080039229 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039230 }
39231 }
39232 }
39233
39234 TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_gt_4_subtile) {
39235 for (uint32_t n = 5; n < 8; n++) {
39236 for (size_t k = 1; k <= 5; k += 2) {
39237 for (uint32_t m = 1; m <= 1; m++) {
39238 GemmMicrokernelTester()
39239 .mr(1)
39240 .nr(4)
39241 .kr(1)
39242 .sr(1)
39243 .m(m)
39244 .n(n)
39245 .k(k)
39246 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080039247 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039248 }
39249 }
39250 }
39251 }
39252
39253 TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_div_4) {
39254 for (uint32_t n = 8; n <= 12; n += 4) {
39255 for (size_t k = 1; k <= 5; k += 2) {
39256 GemmMicrokernelTester()
39257 .mr(1)
39258 .nr(4)
39259 .kr(1)
39260 .sr(1)
39261 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080039262 .n(n)
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039263 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080039264 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039265 }
39266 }
39267 }
39268
39269 TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_div_4_strided_cn) {
39270 for (uint32_t n = 8; n <= 12; n += 4) {
39271 for (size_t k = 1; k <= 5; k += 2) {
39272 GemmMicrokernelTester()
39273 .mr(1)
39274 .nr(4)
39275 .kr(1)
39276 .sr(1)
39277 .m(1)
39278 .n(n)
39279 .k(k)
39280 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080039281 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039282 }
39283 }
39284 }
39285
39286 TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_div_4_subtile) {
39287 for (uint32_t n = 8; n <= 12; n += 4) {
39288 for (size_t k = 1; k <= 5; k += 2) {
39289 for (uint32_t m = 1; m <= 1; m++) {
39290 GemmMicrokernelTester()
39291 .mr(1)
39292 .nr(4)
39293 .kr(1)
39294 .sr(1)
39295 .m(m)
39296 .n(n)
39297 .k(k)
39298 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080039299 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039300 }
39301 }
39302 }
39303 }
39304
39305 TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, small_kernel) {
39306 for (size_t k = 1; k <= 5; k += 2) {
39307 GemmMicrokernelTester()
39308 .mr(1)
39309 .nr(4)
39310 .kr(1)
39311 .sr(1)
39312 .m(1)
39313 .n(4)
39314 .k(k)
39315 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080039316 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039317 }
39318 }
39319
39320 TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, small_kernel_subtile) {
39321 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080039322 for (uint32_t n = 1; n <= 4; n++) {
39323 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039324 GemmMicrokernelTester()
39325 .mr(1)
39326 .nr(4)
39327 .kr(1)
39328 .sr(1)
39329 .m(m)
39330 .n(n)
39331 .k(k)
39332 .ks(3)
39333 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080039334 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039335 }
39336 }
39337 }
39338 }
39339
39340 TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_gt_4_small_kernel) {
39341 for (uint32_t n = 5; n < 8; n++) {
39342 for (size_t k = 1; k <= 5; k += 2) {
39343 GemmMicrokernelTester()
39344 .mr(1)
39345 .nr(4)
39346 .kr(1)
39347 .sr(1)
39348 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080039349 .n(n)
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039350 .k(k)
39351 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080039352 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039353 }
39354 }
39355 }
39356
39357 TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_div_4_small_kernel) {
39358 for (uint32_t n = 8; n <= 12; n += 4) {
39359 for (size_t k = 1; k <= 5; k += 2) {
39360 GemmMicrokernelTester()
39361 .mr(1)
39362 .nr(4)
39363 .kr(1)
39364 .sr(1)
39365 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080039366 .n(n)
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039367 .k(k)
39368 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080039369 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039370 }
39371 }
39372 }
39373
39374 TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, strided_cm_subtile) {
39375 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080039376 for (uint32_t n = 1; n <= 4; n++) {
39377 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039378 GemmMicrokernelTester()
39379 .mr(1)
39380 .nr(4)
39381 .kr(1)
39382 .sr(1)
39383 .m(m)
39384 .n(n)
39385 .k(k)
39386 .cm_stride(7)
39387 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080039388 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039389 }
39390 }
39391 }
39392 }
39393
39394 TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, a_offset) {
39395 for (size_t k = 1; k <= 5; k += 2) {
39396 GemmMicrokernelTester()
39397 .mr(1)
39398 .nr(4)
39399 .kr(1)
39400 .sr(1)
39401 .m(1)
39402 .n(4)
39403 .k(k)
39404 .ks(3)
39405 .a_offset(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080039406 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039407 }
39408 }
39409
39410 TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080039411 for (size_t k = 1; k <= 5; k += 2) {
39412 for (uint32_t mz = 0; mz < 1; mz++) {
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039413 GemmMicrokernelTester()
39414 .mr(1)
39415 .nr(4)
39416 .kr(1)
39417 .sr(1)
39418 .m(1)
39419 .n(4)
39420 .k(k)
39421 .ks(3)
39422 .a_offset(7)
39423 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080039424 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039425 }
39426 }
39427 }
39428
39429 TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, qmin) {
39430 GemmMicrokernelTester()
39431 .mr(1)
39432 .nr(4)
39433 .kr(1)
39434 .sr(1)
39435 .m(1)
39436 .n(4)
39437 .k(1)
39438 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080039439 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039440 }
39441
39442 TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, qmax) {
39443 GemmMicrokernelTester()
39444 .mr(1)
39445 .nr(4)
39446 .kr(1)
39447 .sr(1)
39448 .m(1)
39449 .n(4)
39450 .k(1)
39451 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080039452 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039453 }
39454
39455 TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, strided_cm) {
39456 GemmMicrokernelTester()
39457 .mr(1)
39458 .nr(4)
39459 .kr(1)
39460 .sr(1)
39461 .m(1)
39462 .n(4)
39463 .k(1)
39464 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080039465 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039466 }
39467#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
39468
39469
39470#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
39471 TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, k_eq_1) {
39472 GemmMicrokernelTester()
39473 .mr(2)
39474 .nr(4)
39475 .kr(1)
39476 .sr(1)
39477 .m(2)
39478 .n(4)
39479 .k(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080039480 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039481 }
39482
39483 TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, strided_cn) {
39484 GemmMicrokernelTester()
39485 .mr(2)
39486 .nr(4)
39487 .kr(1)
39488 .sr(1)
39489 .m(2)
39490 .n(4)
39491 .k(1)
39492 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080039493 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039494 }
39495
39496 TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080039497 for (uint32_t n = 1; n <= 4; n++) {
39498 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039499 GemmMicrokernelTester()
39500 .mr(2)
39501 .nr(4)
39502 .kr(1)
39503 .sr(1)
39504 .m(m)
39505 .n(n)
39506 .k(1)
39507 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080039508 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039509 }
39510 }
39511 }
39512
39513 TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, k_eq_1_subtile_m) {
39514 for (uint32_t m = 1; m <= 2; m++) {
39515 GemmMicrokernelTester()
39516 .mr(2)
39517 .nr(4)
39518 .kr(1)
39519 .sr(1)
39520 .m(m)
39521 .n(4)
39522 .k(1)
39523 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080039524 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039525 }
39526 }
39527
39528 TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, k_eq_1_subtile_n) {
39529 for (uint32_t n = 1; n <= 4; n++) {
39530 GemmMicrokernelTester()
39531 .mr(2)
39532 .nr(4)
39533 .kr(1)
39534 .sr(1)
39535 .m(2)
39536 .n(n)
39537 .k(1)
39538 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080039539 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039540 }
39541 }
39542
39543 TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, k_gt_1) {
39544 for (size_t k = 2; k < 10; k++) {
39545 GemmMicrokernelTester()
39546 .mr(2)
39547 .nr(4)
39548 .kr(1)
39549 .sr(1)
39550 .m(2)
39551 .n(4)
39552 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080039553 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039554 }
39555 }
39556
39557 TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, k_gt_1_subtile) {
39558 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080039559 for (uint32_t n = 1; n <= 4; n++) {
39560 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039561 GemmMicrokernelTester()
39562 .mr(2)
39563 .nr(4)
39564 .kr(1)
39565 .sr(1)
39566 .m(m)
39567 .n(n)
39568 .k(k)
39569 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080039570 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039571 }
39572 }
39573 }
39574 }
39575
39576 TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_gt_4) {
39577 for (uint32_t n = 5; n < 8; n++) {
39578 for (size_t k = 1; k <= 5; k += 2) {
39579 GemmMicrokernelTester()
39580 .mr(2)
39581 .nr(4)
39582 .kr(1)
39583 .sr(1)
39584 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080039585 .n(n)
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039586 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080039587 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039588 }
39589 }
39590 }
39591
39592 TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_gt_4_strided_cn) {
39593 for (uint32_t n = 5; n < 8; n++) {
39594 for (size_t k = 1; k <= 5; k += 2) {
39595 GemmMicrokernelTester()
39596 .mr(2)
39597 .nr(4)
39598 .kr(1)
39599 .sr(1)
39600 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080039601 .n(n)
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039602 .k(k)
39603 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080039604 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039605 }
39606 }
39607 }
39608
39609 TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_gt_4_subtile) {
39610 for (uint32_t n = 5; n < 8; n++) {
39611 for (size_t k = 1; k <= 5; k += 2) {
39612 for (uint32_t m = 1; m <= 2; m++) {
39613 GemmMicrokernelTester()
39614 .mr(2)
39615 .nr(4)
39616 .kr(1)
39617 .sr(1)
39618 .m(m)
39619 .n(n)
39620 .k(k)
39621 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080039622 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039623 }
39624 }
39625 }
39626 }
39627
39628 TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_div_4) {
39629 for (uint32_t n = 8; n <= 12; n += 4) {
39630 for (size_t k = 1; k <= 5; k += 2) {
39631 GemmMicrokernelTester()
39632 .mr(2)
39633 .nr(4)
39634 .kr(1)
39635 .sr(1)
39636 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080039637 .n(n)
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039638 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080039639 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039640 }
39641 }
39642 }
39643
39644 TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_div_4_strided_cn) {
39645 for (uint32_t n = 8; n <= 12; n += 4) {
39646 for (size_t k = 1; k <= 5; k += 2) {
39647 GemmMicrokernelTester()
39648 .mr(2)
39649 .nr(4)
39650 .kr(1)
39651 .sr(1)
39652 .m(2)
39653 .n(n)
39654 .k(k)
39655 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080039656 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039657 }
39658 }
39659 }
39660
39661 TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_div_4_subtile) {
39662 for (uint32_t n = 8; n <= 12; n += 4) {
39663 for (size_t k = 1; k <= 5; k += 2) {
39664 for (uint32_t m = 1; m <= 2; m++) {
39665 GemmMicrokernelTester()
39666 .mr(2)
39667 .nr(4)
39668 .kr(1)
39669 .sr(1)
39670 .m(m)
39671 .n(n)
39672 .k(k)
39673 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080039674 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039675 }
39676 }
39677 }
39678 }
39679
39680 TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, small_kernel) {
39681 for (size_t k = 1; k <= 5; k += 2) {
39682 GemmMicrokernelTester()
39683 .mr(2)
39684 .nr(4)
39685 .kr(1)
39686 .sr(1)
39687 .m(2)
39688 .n(4)
39689 .k(k)
39690 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080039691 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039692 }
39693 }
39694
39695 TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, small_kernel_subtile) {
39696 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080039697 for (uint32_t n = 1; n <= 4; n++) {
39698 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039699 GemmMicrokernelTester()
39700 .mr(2)
39701 .nr(4)
39702 .kr(1)
39703 .sr(1)
39704 .m(m)
39705 .n(n)
39706 .k(k)
39707 .ks(3)
39708 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080039709 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039710 }
39711 }
39712 }
39713 }
39714
39715 TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_gt_4_small_kernel) {
39716 for (uint32_t n = 5; n < 8; n++) {
39717 for (size_t k = 1; k <= 5; k += 2) {
39718 GemmMicrokernelTester()
39719 .mr(2)
39720 .nr(4)
39721 .kr(1)
39722 .sr(1)
39723 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080039724 .n(n)
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039725 .k(k)
39726 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080039727 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039728 }
39729 }
39730 }
39731
39732 TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_div_4_small_kernel) {
39733 for (uint32_t n = 8; n <= 12; n += 4) {
39734 for (size_t k = 1; k <= 5; k += 2) {
39735 GemmMicrokernelTester()
39736 .mr(2)
39737 .nr(4)
39738 .kr(1)
39739 .sr(1)
39740 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080039741 .n(n)
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039742 .k(k)
39743 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080039744 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039745 }
39746 }
39747 }
39748
39749 TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, strided_cm_subtile) {
39750 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080039751 for (uint32_t n = 1; n <= 4; n++) {
39752 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039753 GemmMicrokernelTester()
39754 .mr(2)
39755 .nr(4)
39756 .kr(1)
39757 .sr(1)
39758 .m(m)
39759 .n(n)
39760 .k(k)
39761 .cm_stride(7)
39762 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080039763 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039764 }
39765 }
39766 }
39767 }
39768
39769 TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, a_offset) {
39770 for (size_t k = 1; k <= 5; k += 2) {
39771 GemmMicrokernelTester()
39772 .mr(2)
39773 .nr(4)
39774 .kr(1)
39775 .sr(1)
39776 .m(2)
39777 .n(4)
39778 .k(k)
39779 .ks(3)
39780 .a_offset(13)
Marat Dukhan50323b82022-01-11 00:12:01 -080039781 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039782 }
39783 }
39784
39785 TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080039786 for (size_t k = 1; k <= 5; k += 2) {
39787 for (uint32_t mz = 0; mz < 2; mz++) {
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039788 GemmMicrokernelTester()
39789 .mr(2)
39790 .nr(4)
39791 .kr(1)
39792 .sr(1)
39793 .m(2)
39794 .n(4)
39795 .k(k)
39796 .ks(3)
39797 .a_offset(13)
39798 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080039799 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039800 }
39801 }
39802 }
39803
39804 TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, qmin) {
39805 GemmMicrokernelTester()
39806 .mr(2)
39807 .nr(4)
39808 .kr(1)
39809 .sr(1)
39810 .m(2)
39811 .n(4)
39812 .k(1)
39813 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080039814 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039815 }
39816
39817 TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, qmax) {
39818 GemmMicrokernelTester()
39819 .mr(2)
39820 .nr(4)
39821 .kr(1)
39822 .sr(1)
39823 .m(2)
39824 .n(4)
39825 .k(1)
39826 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080039827 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039828 }
39829
39830 TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, strided_cm) {
39831 GemmMicrokernelTester()
39832 .mr(2)
39833 .nr(4)
39834 .kr(1)
39835 .sr(1)
39836 .m(2)
39837 .n(4)
39838 .k(1)
39839 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080039840 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039841 }
39842#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
39843
39844
39845#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039846 TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, k_eq_1) {
39847 GemmMicrokernelTester()
39848 .mr(4)
39849 .nr(4)
39850 .kr(1)
39851 .sr(1)
39852 .m(4)
39853 .n(4)
39854 .k(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080039855 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039856 }
39857
39858 TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, strided_cn) {
39859 GemmMicrokernelTester()
39860 .mr(4)
39861 .nr(4)
39862 .kr(1)
39863 .sr(1)
39864 .m(4)
39865 .n(4)
39866 .k(1)
39867 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080039868 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039869 }
39870
39871 TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080039872 for (uint32_t n = 1; n <= 4; n++) {
39873 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039874 GemmMicrokernelTester()
39875 .mr(4)
39876 .nr(4)
39877 .kr(1)
39878 .sr(1)
39879 .m(m)
39880 .n(n)
39881 .k(1)
39882 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080039883 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039884 }
39885 }
39886 }
39887
39888 TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, k_eq_1_subtile_m) {
39889 for (uint32_t m = 1; m <= 4; m++) {
39890 GemmMicrokernelTester()
39891 .mr(4)
39892 .nr(4)
39893 .kr(1)
39894 .sr(1)
39895 .m(m)
39896 .n(4)
39897 .k(1)
39898 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080039899 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039900 }
39901 }
39902
39903 TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, k_eq_1_subtile_n) {
39904 for (uint32_t n = 1; n <= 4; n++) {
39905 GemmMicrokernelTester()
39906 .mr(4)
39907 .nr(4)
39908 .kr(1)
39909 .sr(1)
39910 .m(4)
39911 .n(n)
39912 .k(1)
39913 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080039914 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039915 }
39916 }
39917
39918 TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, k_gt_1) {
39919 for (size_t k = 2; k < 10; k++) {
39920 GemmMicrokernelTester()
39921 .mr(4)
39922 .nr(4)
39923 .kr(1)
39924 .sr(1)
39925 .m(4)
39926 .n(4)
39927 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080039928 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039929 }
39930 }
39931
39932 TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, k_gt_1_subtile) {
39933 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080039934 for (uint32_t n = 1; n <= 4; n++) {
39935 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039936 GemmMicrokernelTester()
39937 .mr(4)
39938 .nr(4)
39939 .kr(1)
39940 .sr(1)
39941 .m(m)
39942 .n(n)
39943 .k(k)
39944 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080039945 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039946 }
39947 }
39948 }
39949 }
39950
39951 TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_gt_4) {
39952 for (uint32_t n = 5; n < 8; n++) {
39953 for (size_t k = 1; k <= 5; k += 2) {
39954 GemmMicrokernelTester()
39955 .mr(4)
39956 .nr(4)
39957 .kr(1)
39958 .sr(1)
39959 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080039960 .n(n)
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039961 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080039962 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039963 }
39964 }
39965 }
39966
39967 TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_gt_4_strided_cn) {
39968 for (uint32_t n = 5; n < 8; n++) {
39969 for (size_t k = 1; k <= 5; k += 2) {
39970 GemmMicrokernelTester()
39971 .mr(4)
39972 .nr(4)
39973 .kr(1)
39974 .sr(1)
39975 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080039976 .n(n)
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039977 .k(k)
39978 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080039979 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039980 }
39981 }
39982 }
39983
39984 TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_gt_4_subtile) {
39985 for (uint32_t n = 5; n < 8; n++) {
39986 for (size_t k = 1; k <= 5; k += 2) {
39987 for (uint32_t m = 1; m <= 4; m++) {
39988 GemmMicrokernelTester()
39989 .mr(4)
39990 .nr(4)
39991 .kr(1)
39992 .sr(1)
39993 .m(m)
39994 .n(n)
39995 .k(k)
39996 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080039997 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080039998 }
39999 }
40000 }
40001 }
40002
40003 TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_div_4) {
40004 for (uint32_t n = 8; n <= 12; n += 4) {
40005 for (size_t k = 1; k <= 5; k += 2) {
40006 GemmMicrokernelTester()
40007 .mr(4)
40008 .nr(4)
40009 .kr(1)
40010 .sr(1)
40011 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080040012 .n(n)
Marat Dukhan7c1115f2022-01-04 17:18:41 -080040013 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080040014 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080040015 }
40016 }
40017 }
40018
40019 TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_div_4_strided_cn) {
40020 for (uint32_t n = 8; n <= 12; n += 4) {
40021 for (size_t k = 1; k <= 5; k += 2) {
40022 GemmMicrokernelTester()
40023 .mr(4)
40024 .nr(4)
40025 .kr(1)
40026 .sr(1)
40027 .m(4)
40028 .n(n)
40029 .k(k)
40030 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080040031 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080040032 }
40033 }
40034 }
40035
40036 TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_div_4_subtile) {
40037 for (uint32_t n = 8; n <= 12; n += 4) {
40038 for (size_t k = 1; k <= 5; k += 2) {
40039 for (uint32_t m = 1; m <= 4; m++) {
40040 GemmMicrokernelTester()
40041 .mr(4)
40042 .nr(4)
40043 .kr(1)
40044 .sr(1)
40045 .m(m)
40046 .n(n)
40047 .k(k)
40048 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080040049 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080040050 }
40051 }
40052 }
40053 }
40054
40055 TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, small_kernel) {
40056 for (size_t k = 1; k <= 5; k += 2) {
40057 GemmMicrokernelTester()
40058 .mr(4)
40059 .nr(4)
40060 .kr(1)
40061 .sr(1)
40062 .m(4)
40063 .n(4)
40064 .k(k)
40065 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080040066 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080040067 }
40068 }
40069
40070 TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, small_kernel_subtile) {
40071 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080040072 for (uint32_t n = 1; n <= 4; n++) {
40073 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan7c1115f2022-01-04 17:18:41 -080040074 GemmMicrokernelTester()
40075 .mr(4)
40076 .nr(4)
40077 .kr(1)
40078 .sr(1)
40079 .m(m)
40080 .n(n)
40081 .k(k)
40082 .ks(3)
40083 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080040084 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080040085 }
40086 }
40087 }
40088 }
40089
40090 TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_gt_4_small_kernel) {
40091 for (uint32_t n = 5; n < 8; n++) {
40092 for (size_t k = 1; k <= 5; k += 2) {
40093 GemmMicrokernelTester()
40094 .mr(4)
40095 .nr(4)
40096 .kr(1)
40097 .sr(1)
40098 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080040099 .n(n)
Marat Dukhan7c1115f2022-01-04 17:18:41 -080040100 .k(k)
40101 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080040102 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080040103 }
40104 }
40105 }
40106
40107 TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_div_4_small_kernel) {
40108 for (uint32_t n = 8; n <= 12; n += 4) {
40109 for (size_t k = 1; k <= 5; k += 2) {
40110 GemmMicrokernelTester()
40111 .mr(4)
40112 .nr(4)
40113 .kr(1)
40114 .sr(1)
40115 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080040116 .n(n)
Marat Dukhan7c1115f2022-01-04 17:18:41 -080040117 .k(k)
40118 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080040119 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080040120 }
40121 }
40122 }
40123
40124 TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, strided_cm_subtile) {
40125 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080040126 for (uint32_t n = 1; n <= 4; n++) {
40127 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan7c1115f2022-01-04 17:18:41 -080040128 GemmMicrokernelTester()
40129 .mr(4)
40130 .nr(4)
40131 .kr(1)
40132 .sr(1)
40133 .m(m)
40134 .n(n)
40135 .k(k)
40136 .cm_stride(7)
40137 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080040138 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080040139 }
40140 }
40141 }
40142 }
40143
40144 TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, a_offset) {
40145 for (size_t k = 1; k <= 5; k += 2) {
40146 GemmMicrokernelTester()
40147 .mr(4)
40148 .nr(4)
40149 .kr(1)
40150 .sr(1)
40151 .m(4)
40152 .n(4)
40153 .k(k)
40154 .ks(3)
40155 .a_offset(23)
Marat Dukhan50323b82022-01-11 00:12:01 -080040156 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080040157 }
40158 }
40159
40160 TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080040161 for (size_t k = 1; k <= 5; k += 2) {
40162 for (uint32_t mz = 0; mz < 4; mz++) {
Marat Dukhan7c1115f2022-01-04 17:18:41 -080040163 GemmMicrokernelTester()
40164 .mr(4)
40165 .nr(4)
40166 .kr(1)
40167 .sr(1)
40168 .m(4)
40169 .n(4)
40170 .k(k)
40171 .ks(3)
40172 .a_offset(23)
40173 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080040174 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080040175 }
40176 }
40177 }
40178
40179 TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, qmin) {
40180 GemmMicrokernelTester()
40181 .mr(4)
40182 .nr(4)
40183 .kr(1)
40184 .sr(1)
40185 .m(4)
40186 .n(4)
40187 .k(1)
40188 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080040189 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080040190 }
40191
40192 TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, qmax) {
40193 GemmMicrokernelTester()
40194 .mr(4)
40195 .nr(4)
40196 .kr(1)
40197 .sr(1)
40198 .m(4)
40199 .n(4)
40200 .k(1)
40201 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080040202 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080040203 }
40204
40205 TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, strided_cm) {
40206 GemmMicrokernelTester()
40207 .mr(4)
40208 .nr(4)
40209 .kr(1)
40210 .sr(1)
40211 .m(4)
40212 .n(4)
40213 .k(1)
40214 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080040215 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080040216 }
40217#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
40218
40219
Marat Dukhan2ac722e2022-01-04 01:54:20 -080040220TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, k_eq_1) {
Marat Dukhan779b2532021-06-29 14:14:13 -070040221 GemmMicrokernelTester()
40222 .mr(3)
40223 .nr(2)
40224 .kr(1)
40225 .sr(1)
40226 .m(3)
40227 .n(2)
40228 .k(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080040229 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070040230}
40231
Marat Dukhan2ac722e2022-01-04 01:54:20 -080040232TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, strided_cn) {
Marat Dukhan779b2532021-06-29 14:14:13 -070040233 GemmMicrokernelTester()
40234 .mr(3)
40235 .nr(2)
40236 .kr(1)
40237 .sr(1)
40238 .m(3)
40239 .n(2)
40240 .k(1)
40241 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080040242 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070040243}
40244
Marat Dukhan2ac722e2022-01-04 01:54:20 -080040245TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080040246 for (uint32_t n = 1; n <= 2; n++) {
40247 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan779b2532021-06-29 14:14:13 -070040248 GemmMicrokernelTester()
40249 .mr(3)
40250 .nr(2)
40251 .kr(1)
40252 .sr(1)
40253 .m(m)
40254 .n(n)
40255 .k(1)
40256 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080040257 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070040258 }
40259 }
40260}
40261
Marat Dukhan2ac722e2022-01-04 01:54:20 -080040262TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, k_eq_1_subtile_m) {
Marat Dukhan779b2532021-06-29 14:14:13 -070040263 for (uint32_t m = 1; m <= 3; m++) {
40264 GemmMicrokernelTester()
40265 .mr(3)
40266 .nr(2)
40267 .kr(1)
40268 .sr(1)
40269 .m(m)
40270 .n(2)
40271 .k(1)
40272 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080040273 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070040274 }
40275}
40276
Marat Dukhan2ac722e2022-01-04 01:54:20 -080040277TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, k_eq_1_subtile_n) {
Marat Dukhan779b2532021-06-29 14:14:13 -070040278 for (uint32_t n = 1; n <= 2; n++) {
40279 GemmMicrokernelTester()
40280 .mr(3)
40281 .nr(2)
40282 .kr(1)
40283 .sr(1)
40284 .m(3)
40285 .n(n)
40286 .k(1)
40287 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080040288 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070040289 }
40290}
40291
Marat Dukhan2ac722e2022-01-04 01:54:20 -080040292TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, k_gt_1) {
Marat Dukhan779b2532021-06-29 14:14:13 -070040293 for (size_t k = 2; k < 10; k++) {
40294 GemmMicrokernelTester()
40295 .mr(3)
40296 .nr(2)
40297 .kr(1)
40298 .sr(1)
40299 .m(3)
40300 .n(2)
40301 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080040302 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070040303 }
40304}
40305
Marat Dukhan2ac722e2022-01-04 01:54:20 -080040306TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, k_gt_1_subtile) {
Marat Dukhan779b2532021-06-29 14:14:13 -070040307 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080040308 for (uint32_t n = 1; n <= 2; n++) {
40309 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan779b2532021-06-29 14:14:13 -070040310 GemmMicrokernelTester()
40311 .mr(3)
40312 .nr(2)
40313 .kr(1)
40314 .sr(1)
40315 .m(m)
40316 .n(n)
40317 .k(k)
40318 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080040319 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070040320 }
40321 }
40322 }
40323}
40324
Marat Dukhan2ac722e2022-01-04 01:54:20 -080040325TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, n_gt_2) {
Marat Dukhan779b2532021-06-29 14:14:13 -070040326 for (uint32_t n = 3; n < 4; n++) {
40327 for (size_t k = 1; k <= 5; k += 2) {
40328 GemmMicrokernelTester()
40329 .mr(3)
40330 .nr(2)
40331 .kr(1)
40332 .sr(1)
40333 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080040334 .n(n)
Marat Dukhan779b2532021-06-29 14:14:13 -070040335 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080040336 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070040337 }
40338 }
40339}
40340
Marat Dukhan2ac722e2022-01-04 01:54:20 -080040341TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, n_gt_2_strided_cn) {
Marat Dukhan779b2532021-06-29 14:14:13 -070040342 for (uint32_t n = 3; n < 4; n++) {
40343 for (size_t k = 1; k <= 5; k += 2) {
40344 GemmMicrokernelTester()
40345 .mr(3)
40346 .nr(2)
40347 .kr(1)
40348 .sr(1)
40349 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080040350 .n(n)
Marat Dukhan779b2532021-06-29 14:14:13 -070040351 .k(k)
40352 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080040353 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070040354 }
40355 }
40356}
40357
Marat Dukhan2ac722e2022-01-04 01:54:20 -080040358TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, n_gt_2_subtile) {
Marat Dukhan779b2532021-06-29 14:14:13 -070040359 for (uint32_t n = 3; n < 4; n++) {
40360 for (size_t k = 1; k <= 5; k += 2) {
40361 for (uint32_t m = 1; m <= 3; m++) {
40362 GemmMicrokernelTester()
40363 .mr(3)
40364 .nr(2)
40365 .kr(1)
40366 .sr(1)
40367 .m(m)
40368 .n(n)
40369 .k(k)
40370 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080040371 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070040372 }
40373 }
40374 }
40375}
40376
Marat Dukhan2ac722e2022-01-04 01:54:20 -080040377TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, n_div_2) {
Marat Dukhan779b2532021-06-29 14:14:13 -070040378 for (uint32_t n = 4; n <= 6; n += 2) {
40379 for (size_t k = 1; k <= 5; k += 2) {
40380 GemmMicrokernelTester()
40381 .mr(3)
40382 .nr(2)
40383 .kr(1)
40384 .sr(1)
40385 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080040386 .n(n)
Marat Dukhan779b2532021-06-29 14:14:13 -070040387 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080040388 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070040389 }
40390 }
40391}
40392
Marat Dukhan2ac722e2022-01-04 01:54:20 -080040393TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, n_div_2_strided_cn) {
Marat Dukhan779b2532021-06-29 14:14:13 -070040394 for (uint32_t n = 4; n <= 6; n += 2) {
40395 for (size_t k = 1; k <= 5; k += 2) {
40396 GemmMicrokernelTester()
40397 .mr(3)
40398 .nr(2)
40399 .kr(1)
40400 .sr(1)
40401 .m(3)
40402 .n(n)
40403 .k(k)
40404 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080040405 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070040406 }
40407 }
40408}
40409
Marat Dukhan2ac722e2022-01-04 01:54:20 -080040410TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, n_div_2_subtile) {
Marat Dukhan779b2532021-06-29 14:14:13 -070040411 for (uint32_t n = 4; n <= 6; n += 2) {
40412 for (size_t k = 1; k <= 5; k += 2) {
40413 for (uint32_t m = 1; m <= 3; m++) {
40414 GemmMicrokernelTester()
40415 .mr(3)
40416 .nr(2)
40417 .kr(1)
40418 .sr(1)
40419 .m(m)
40420 .n(n)
40421 .k(k)
40422 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080040423 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070040424 }
40425 }
40426 }
40427}
40428
Marat Dukhan2ac722e2022-01-04 01:54:20 -080040429TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, small_kernel) {
Marat Dukhan779b2532021-06-29 14:14:13 -070040430 for (size_t k = 1; k <= 5; k += 2) {
40431 GemmMicrokernelTester()
40432 .mr(3)
40433 .nr(2)
40434 .kr(1)
40435 .sr(1)
40436 .m(3)
40437 .n(2)
40438 .k(k)
40439 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080040440 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070040441 }
40442}
40443
Marat Dukhan2ac722e2022-01-04 01:54:20 -080040444TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, small_kernel_subtile) {
Marat Dukhan779b2532021-06-29 14:14:13 -070040445 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080040446 for (uint32_t n = 1; n <= 2; n++) {
40447 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan779b2532021-06-29 14:14:13 -070040448 GemmMicrokernelTester()
40449 .mr(3)
40450 .nr(2)
40451 .kr(1)
40452 .sr(1)
40453 .m(m)
40454 .n(n)
40455 .k(k)
40456 .ks(3)
40457 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080040458 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070040459 }
40460 }
40461 }
40462}
40463
Marat Dukhan2ac722e2022-01-04 01:54:20 -080040464TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, n_gt_2_small_kernel) {
Marat Dukhan779b2532021-06-29 14:14:13 -070040465 for (uint32_t n = 3; n < 4; n++) {
40466 for (size_t k = 1; k <= 5; k += 2) {
40467 GemmMicrokernelTester()
40468 .mr(3)
40469 .nr(2)
40470 .kr(1)
40471 .sr(1)
40472 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080040473 .n(n)
Marat Dukhan779b2532021-06-29 14:14:13 -070040474 .k(k)
40475 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080040476 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070040477 }
40478 }
40479}
40480
Marat Dukhan2ac722e2022-01-04 01:54:20 -080040481TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, n_div_2_small_kernel) {
Marat Dukhan779b2532021-06-29 14:14:13 -070040482 for (uint32_t n = 4; n <= 6; n += 2) {
40483 for (size_t k = 1; k <= 5; k += 2) {
40484 GemmMicrokernelTester()
40485 .mr(3)
40486 .nr(2)
40487 .kr(1)
40488 .sr(1)
40489 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080040490 .n(n)
Marat Dukhan779b2532021-06-29 14:14:13 -070040491 .k(k)
40492 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080040493 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070040494 }
40495 }
40496}
40497
Marat Dukhan2ac722e2022-01-04 01:54:20 -080040498TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, strided_cm_subtile) {
Marat Dukhan779b2532021-06-29 14:14:13 -070040499 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080040500 for (uint32_t n = 1; n <= 2; n++) {
40501 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan779b2532021-06-29 14:14:13 -070040502 GemmMicrokernelTester()
40503 .mr(3)
40504 .nr(2)
40505 .kr(1)
40506 .sr(1)
40507 .m(m)
40508 .n(n)
40509 .k(k)
40510 .cm_stride(5)
40511 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080040512 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070040513 }
40514 }
40515 }
40516}
40517
Marat Dukhan2ac722e2022-01-04 01:54:20 -080040518TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, a_offset) {
Marat Dukhan779b2532021-06-29 14:14:13 -070040519 for (size_t k = 1; k <= 5; k += 2) {
40520 GemmMicrokernelTester()
40521 .mr(3)
40522 .nr(2)
40523 .kr(1)
40524 .sr(1)
40525 .m(3)
40526 .n(2)
40527 .k(k)
40528 .ks(3)
40529 .a_offset(17)
Marat Dukhan50323b82022-01-11 00:12:01 -080040530 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070040531 }
40532}
40533
Marat Dukhan2ac722e2022-01-04 01:54:20 -080040534TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080040535 for (size_t k = 1; k <= 5; k += 2) {
40536 for (uint32_t mz = 0; mz < 3; mz++) {
Marat Dukhan779b2532021-06-29 14:14:13 -070040537 GemmMicrokernelTester()
40538 .mr(3)
40539 .nr(2)
40540 .kr(1)
40541 .sr(1)
40542 .m(3)
40543 .n(2)
40544 .k(k)
40545 .ks(3)
40546 .a_offset(17)
40547 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080040548 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070040549 }
40550 }
40551}
40552
Marat Dukhan2ac722e2022-01-04 01:54:20 -080040553TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, qmin) {
Marat Dukhan779b2532021-06-29 14:14:13 -070040554 GemmMicrokernelTester()
40555 .mr(3)
40556 .nr(2)
40557 .kr(1)
40558 .sr(1)
40559 .m(3)
40560 .n(2)
40561 .k(1)
40562 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080040563 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070040564}
40565
Marat Dukhan2ac722e2022-01-04 01:54:20 -080040566TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, qmax) {
Marat Dukhan779b2532021-06-29 14:14:13 -070040567 GemmMicrokernelTester()
40568 .mr(3)
40569 .nr(2)
40570 .kr(1)
40571 .sr(1)
40572 .m(3)
40573 .n(2)
40574 .k(1)
40575 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080040576 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070040577}
40578
Marat Dukhan2ac722e2022-01-04 01:54:20 -080040579TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, strided_cm) {
Marat Dukhan779b2532021-06-29 14:14:13 -070040580 GemmMicrokernelTester()
40581 .mr(3)
40582 .nr(2)
40583 .kr(1)
40584 .sr(1)
40585 .m(3)
40586 .n(2)
40587 .k(1)
40588 .cm_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080040589 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070040590}
40591
40592
Marat Dukhan2ac722e2022-01-04 01:54:20 -080040593TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, k_eq_1) {
Marat Dukhan779b2532021-06-29 14:14:13 -070040594 GemmMicrokernelTester()
40595 .mr(4)
40596 .nr(2)
40597 .kr(1)
40598 .sr(1)
40599 .m(4)
40600 .n(2)
40601 .k(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080040602 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070040603}
40604
Marat Dukhan2ac722e2022-01-04 01:54:20 -080040605TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, strided_cn) {
Marat Dukhan779b2532021-06-29 14:14:13 -070040606 GemmMicrokernelTester()
40607 .mr(4)
40608 .nr(2)
40609 .kr(1)
40610 .sr(1)
40611 .m(4)
40612 .n(2)
40613 .k(1)
40614 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080040615 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070040616}
40617
Marat Dukhan2ac722e2022-01-04 01:54:20 -080040618TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080040619 for (uint32_t n = 1; n <= 2; n++) {
40620 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan779b2532021-06-29 14:14:13 -070040621 GemmMicrokernelTester()
40622 .mr(4)
40623 .nr(2)
40624 .kr(1)
40625 .sr(1)
40626 .m(m)
40627 .n(n)
40628 .k(1)
40629 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080040630 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070040631 }
40632 }
40633}
40634
Marat Dukhan2ac722e2022-01-04 01:54:20 -080040635TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, k_eq_1_subtile_m) {
Marat Dukhan779b2532021-06-29 14:14:13 -070040636 for (uint32_t m = 1; m <= 4; m++) {
40637 GemmMicrokernelTester()
40638 .mr(4)
40639 .nr(2)
40640 .kr(1)
40641 .sr(1)
40642 .m(m)
40643 .n(2)
40644 .k(1)
40645 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080040646 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070040647 }
40648}
40649
Marat Dukhan2ac722e2022-01-04 01:54:20 -080040650TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, k_eq_1_subtile_n) {
Marat Dukhan779b2532021-06-29 14:14:13 -070040651 for (uint32_t n = 1; n <= 2; n++) {
40652 GemmMicrokernelTester()
40653 .mr(4)
40654 .nr(2)
40655 .kr(1)
40656 .sr(1)
40657 .m(4)
40658 .n(n)
40659 .k(1)
40660 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080040661 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070040662 }
40663}
40664
Marat Dukhan2ac722e2022-01-04 01:54:20 -080040665TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, k_gt_1) {
Marat Dukhan779b2532021-06-29 14:14:13 -070040666 for (size_t k = 2; k < 10; k++) {
40667 GemmMicrokernelTester()
40668 .mr(4)
40669 .nr(2)
40670 .kr(1)
40671 .sr(1)
40672 .m(4)
40673 .n(2)
40674 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080040675 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070040676 }
40677}
40678
Marat Dukhan2ac722e2022-01-04 01:54:20 -080040679TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, k_gt_1_subtile) {
Marat Dukhan779b2532021-06-29 14:14:13 -070040680 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080040681 for (uint32_t n = 1; n <= 2; n++) {
40682 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan779b2532021-06-29 14:14:13 -070040683 GemmMicrokernelTester()
40684 .mr(4)
40685 .nr(2)
40686 .kr(1)
40687 .sr(1)
40688 .m(m)
40689 .n(n)
40690 .k(k)
40691 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080040692 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070040693 }
40694 }
40695 }
40696}
40697
Marat Dukhan2ac722e2022-01-04 01:54:20 -080040698TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, n_gt_2) {
Marat Dukhan779b2532021-06-29 14:14:13 -070040699 for (uint32_t n = 3; n < 4; n++) {
40700 for (size_t k = 1; k <= 5; k += 2) {
40701 GemmMicrokernelTester()
40702 .mr(4)
40703 .nr(2)
40704 .kr(1)
40705 .sr(1)
40706 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080040707 .n(n)
Marat Dukhan779b2532021-06-29 14:14:13 -070040708 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080040709 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070040710 }
40711 }
40712}
40713
Marat Dukhan2ac722e2022-01-04 01:54:20 -080040714TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, n_gt_2_strided_cn) {
Marat Dukhan779b2532021-06-29 14:14:13 -070040715 for (uint32_t n = 3; n < 4; n++) {
40716 for (size_t k = 1; k <= 5; k += 2) {
40717 GemmMicrokernelTester()
40718 .mr(4)
40719 .nr(2)
40720 .kr(1)
40721 .sr(1)
40722 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080040723 .n(n)
Marat Dukhan779b2532021-06-29 14:14:13 -070040724 .k(k)
40725 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080040726 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070040727 }
40728 }
40729}
40730
Marat Dukhan2ac722e2022-01-04 01:54:20 -080040731TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, n_gt_2_subtile) {
Marat Dukhan779b2532021-06-29 14:14:13 -070040732 for (uint32_t n = 3; n < 4; n++) {
40733 for (size_t k = 1; k <= 5; k += 2) {
40734 for (uint32_t m = 1; m <= 4; m++) {
40735 GemmMicrokernelTester()
40736 .mr(4)
40737 .nr(2)
40738 .kr(1)
40739 .sr(1)
40740 .m(m)
40741 .n(n)
40742 .k(k)
40743 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080040744 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070040745 }
40746 }
40747 }
40748}
40749
Marat Dukhan2ac722e2022-01-04 01:54:20 -080040750TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, n_div_2) {
Marat Dukhan779b2532021-06-29 14:14:13 -070040751 for (uint32_t n = 4; n <= 6; n += 2) {
40752 for (size_t k = 1; k <= 5; k += 2) {
40753 GemmMicrokernelTester()
40754 .mr(4)
40755 .nr(2)
40756 .kr(1)
40757 .sr(1)
40758 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080040759 .n(n)
Marat Dukhan779b2532021-06-29 14:14:13 -070040760 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080040761 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070040762 }
40763 }
40764}
40765
Marat Dukhan2ac722e2022-01-04 01:54:20 -080040766TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, n_div_2_strided_cn) {
Marat Dukhan779b2532021-06-29 14:14:13 -070040767 for (uint32_t n = 4; n <= 6; n += 2) {
40768 for (size_t k = 1; k <= 5; k += 2) {
40769 GemmMicrokernelTester()
40770 .mr(4)
40771 .nr(2)
40772 .kr(1)
40773 .sr(1)
40774 .m(4)
40775 .n(n)
40776 .k(k)
40777 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080040778 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070040779 }
40780 }
40781}
40782
Marat Dukhan2ac722e2022-01-04 01:54:20 -080040783TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, n_div_2_subtile) {
Marat Dukhan779b2532021-06-29 14:14:13 -070040784 for (uint32_t n = 4; n <= 6; n += 2) {
40785 for (size_t k = 1; k <= 5; k += 2) {
40786 for (uint32_t m = 1; m <= 4; m++) {
40787 GemmMicrokernelTester()
40788 .mr(4)
40789 .nr(2)
40790 .kr(1)
40791 .sr(1)
40792 .m(m)
40793 .n(n)
40794 .k(k)
40795 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080040796 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070040797 }
40798 }
40799 }
40800}
40801
Marat Dukhan2ac722e2022-01-04 01:54:20 -080040802TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, small_kernel) {
Marat Dukhan779b2532021-06-29 14:14:13 -070040803 for (size_t k = 1; k <= 5; k += 2) {
40804 GemmMicrokernelTester()
40805 .mr(4)
40806 .nr(2)
40807 .kr(1)
40808 .sr(1)
40809 .m(4)
40810 .n(2)
40811 .k(k)
40812 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080040813 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070040814 }
40815}
40816
Marat Dukhan2ac722e2022-01-04 01:54:20 -080040817TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, small_kernel_subtile) {
Marat Dukhan779b2532021-06-29 14:14:13 -070040818 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080040819 for (uint32_t n = 1; n <= 2; n++) {
40820 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan779b2532021-06-29 14:14:13 -070040821 GemmMicrokernelTester()
40822 .mr(4)
40823 .nr(2)
40824 .kr(1)
40825 .sr(1)
40826 .m(m)
40827 .n(n)
40828 .k(k)
40829 .ks(3)
40830 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080040831 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070040832 }
40833 }
40834 }
40835}
40836
Marat Dukhan2ac722e2022-01-04 01:54:20 -080040837TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, n_gt_2_small_kernel) {
Marat Dukhan779b2532021-06-29 14:14:13 -070040838 for (uint32_t n = 3; n < 4; n++) {
40839 for (size_t k = 1; k <= 5; k += 2) {
40840 GemmMicrokernelTester()
40841 .mr(4)
40842 .nr(2)
40843 .kr(1)
40844 .sr(1)
40845 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080040846 .n(n)
Marat Dukhan779b2532021-06-29 14:14:13 -070040847 .k(k)
40848 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080040849 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070040850 }
40851 }
40852}
40853
Marat Dukhan2ac722e2022-01-04 01:54:20 -080040854TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, n_div_2_small_kernel) {
Marat Dukhan779b2532021-06-29 14:14:13 -070040855 for (uint32_t n = 4; n <= 6; n += 2) {
40856 for (size_t k = 1; k <= 5; k += 2) {
40857 GemmMicrokernelTester()
40858 .mr(4)
40859 .nr(2)
40860 .kr(1)
40861 .sr(1)
40862 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080040863 .n(n)
Marat Dukhan779b2532021-06-29 14:14:13 -070040864 .k(k)
40865 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080040866 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070040867 }
40868 }
40869}
40870
Marat Dukhan2ac722e2022-01-04 01:54:20 -080040871TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, strided_cm_subtile) {
Marat Dukhan779b2532021-06-29 14:14:13 -070040872 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080040873 for (uint32_t n = 1; n <= 2; n++) {
40874 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan779b2532021-06-29 14:14:13 -070040875 GemmMicrokernelTester()
40876 .mr(4)
40877 .nr(2)
40878 .kr(1)
40879 .sr(1)
40880 .m(m)
40881 .n(n)
40882 .k(k)
40883 .cm_stride(5)
40884 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080040885 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070040886 }
40887 }
40888 }
40889}
40890
Marat Dukhan2ac722e2022-01-04 01:54:20 -080040891TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, a_offset) {
Marat Dukhan779b2532021-06-29 14:14:13 -070040892 for (size_t k = 1; k <= 5; k += 2) {
40893 GemmMicrokernelTester()
40894 .mr(4)
40895 .nr(2)
40896 .kr(1)
40897 .sr(1)
40898 .m(4)
40899 .n(2)
40900 .k(k)
40901 .ks(3)
40902 .a_offset(23)
Marat Dukhan50323b82022-01-11 00:12:01 -080040903 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070040904 }
40905}
40906
Marat Dukhan2ac722e2022-01-04 01:54:20 -080040907TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080040908 for (size_t k = 1; k <= 5; k += 2) {
40909 for (uint32_t mz = 0; mz < 4; mz++) {
Marat Dukhan779b2532021-06-29 14:14:13 -070040910 GemmMicrokernelTester()
40911 .mr(4)
40912 .nr(2)
40913 .kr(1)
40914 .sr(1)
40915 .m(4)
40916 .n(2)
40917 .k(k)
40918 .ks(3)
40919 .a_offset(23)
40920 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080040921 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070040922 }
40923 }
40924}
40925
Marat Dukhan2ac722e2022-01-04 01:54:20 -080040926TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, qmin) {
Marat Dukhan779b2532021-06-29 14:14:13 -070040927 GemmMicrokernelTester()
40928 .mr(4)
40929 .nr(2)
40930 .kr(1)
40931 .sr(1)
40932 .m(4)
40933 .n(2)
40934 .k(1)
40935 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080040936 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070040937}
40938
Marat Dukhan2ac722e2022-01-04 01:54:20 -080040939TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, qmax) {
Marat Dukhan779b2532021-06-29 14:14:13 -070040940 GemmMicrokernelTester()
40941 .mr(4)
40942 .nr(2)
40943 .kr(1)
40944 .sr(1)
40945 .m(4)
40946 .n(2)
40947 .k(1)
40948 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080040949 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070040950}
40951
Marat Dukhan2ac722e2022-01-04 01:54:20 -080040952TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, strided_cm) {
Marat Dukhan779b2532021-06-29 14:14:13 -070040953 GemmMicrokernelTester()
40954 .mr(4)
40955 .nr(2)
40956 .kr(1)
40957 .sr(1)
40958 .m(4)
40959 .n(2)
40960 .k(1)
40961 .cm_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080040962 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070040963}
40964
40965
Marat Dukhan2ac722e2022-01-04 01:54:20 -080040966TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, k_eq_1) {
Marat Dukhan779b2532021-06-29 14:14:13 -070040967 GemmMicrokernelTester()
40968 .mr(3)
40969 .nr(4)
40970 .kr(1)
40971 .sr(1)
40972 .m(3)
40973 .n(4)
40974 .k(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080040975 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070040976}
40977
Marat Dukhan2ac722e2022-01-04 01:54:20 -080040978TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, strided_cn) {
Marat Dukhan779b2532021-06-29 14:14:13 -070040979 GemmMicrokernelTester()
40980 .mr(3)
40981 .nr(4)
40982 .kr(1)
40983 .sr(1)
40984 .m(3)
40985 .n(4)
40986 .k(1)
40987 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080040988 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070040989}
40990
Marat Dukhan2ac722e2022-01-04 01:54:20 -080040991TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080040992 for (uint32_t n = 1; n <= 4; n++) {
40993 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan779b2532021-06-29 14:14:13 -070040994 GemmMicrokernelTester()
40995 .mr(3)
40996 .nr(4)
40997 .kr(1)
40998 .sr(1)
40999 .m(m)
41000 .n(n)
41001 .k(1)
41002 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080041003 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070041004 }
41005 }
41006}
41007
Marat Dukhan2ac722e2022-01-04 01:54:20 -080041008TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, k_eq_1_subtile_m) {
Marat Dukhan779b2532021-06-29 14:14:13 -070041009 for (uint32_t m = 1; m <= 3; m++) {
41010 GemmMicrokernelTester()
41011 .mr(3)
41012 .nr(4)
41013 .kr(1)
41014 .sr(1)
41015 .m(m)
41016 .n(4)
41017 .k(1)
41018 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080041019 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070041020 }
41021}
41022
Marat Dukhan2ac722e2022-01-04 01:54:20 -080041023TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, k_eq_1_subtile_n) {
Marat Dukhan779b2532021-06-29 14:14:13 -070041024 for (uint32_t n = 1; n <= 4; n++) {
41025 GemmMicrokernelTester()
41026 .mr(3)
41027 .nr(4)
41028 .kr(1)
41029 .sr(1)
41030 .m(3)
41031 .n(n)
41032 .k(1)
41033 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080041034 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070041035 }
41036}
41037
Marat Dukhan2ac722e2022-01-04 01:54:20 -080041038TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, k_gt_1) {
Marat Dukhan779b2532021-06-29 14:14:13 -070041039 for (size_t k = 2; k < 10; k++) {
41040 GemmMicrokernelTester()
41041 .mr(3)
41042 .nr(4)
41043 .kr(1)
41044 .sr(1)
41045 .m(3)
41046 .n(4)
41047 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080041048 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070041049 }
41050}
41051
Marat Dukhan2ac722e2022-01-04 01:54:20 -080041052TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, k_gt_1_subtile) {
Marat Dukhan779b2532021-06-29 14:14:13 -070041053 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080041054 for (uint32_t n = 1; n <= 4; n++) {
41055 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan779b2532021-06-29 14:14:13 -070041056 GemmMicrokernelTester()
41057 .mr(3)
41058 .nr(4)
41059 .kr(1)
41060 .sr(1)
41061 .m(m)
41062 .n(n)
41063 .k(k)
41064 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080041065 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070041066 }
41067 }
41068 }
41069}
41070
Marat Dukhan2ac722e2022-01-04 01:54:20 -080041071TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, n_gt_4) {
Marat Dukhan779b2532021-06-29 14:14:13 -070041072 for (uint32_t n = 5; n < 8; n++) {
41073 for (size_t k = 1; k <= 5; k += 2) {
41074 GemmMicrokernelTester()
41075 .mr(3)
41076 .nr(4)
41077 .kr(1)
41078 .sr(1)
41079 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080041080 .n(n)
Marat Dukhan779b2532021-06-29 14:14:13 -070041081 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080041082 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070041083 }
41084 }
41085}
41086
Marat Dukhan2ac722e2022-01-04 01:54:20 -080041087TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, n_gt_4_strided_cn) {
Marat Dukhan779b2532021-06-29 14:14:13 -070041088 for (uint32_t n = 5; n < 8; n++) {
41089 for (size_t k = 1; k <= 5; k += 2) {
41090 GemmMicrokernelTester()
41091 .mr(3)
41092 .nr(4)
41093 .kr(1)
41094 .sr(1)
41095 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080041096 .n(n)
Marat Dukhan779b2532021-06-29 14:14:13 -070041097 .k(k)
41098 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080041099 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070041100 }
41101 }
41102}
41103
Marat Dukhan2ac722e2022-01-04 01:54:20 -080041104TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, n_gt_4_subtile) {
Marat Dukhan779b2532021-06-29 14:14:13 -070041105 for (uint32_t n = 5; n < 8; n++) {
41106 for (size_t k = 1; k <= 5; k += 2) {
41107 for (uint32_t m = 1; m <= 3; m++) {
41108 GemmMicrokernelTester()
41109 .mr(3)
41110 .nr(4)
41111 .kr(1)
41112 .sr(1)
41113 .m(m)
41114 .n(n)
41115 .k(k)
41116 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080041117 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070041118 }
41119 }
41120 }
41121}
41122
Marat Dukhan2ac722e2022-01-04 01:54:20 -080041123TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, n_div_4) {
Marat Dukhan779b2532021-06-29 14:14:13 -070041124 for (uint32_t n = 8; n <= 12; n += 4) {
41125 for (size_t k = 1; k <= 5; k += 2) {
41126 GemmMicrokernelTester()
41127 .mr(3)
41128 .nr(4)
41129 .kr(1)
41130 .sr(1)
41131 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080041132 .n(n)
Marat Dukhan779b2532021-06-29 14:14:13 -070041133 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080041134 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070041135 }
41136 }
41137}
41138
Marat Dukhan2ac722e2022-01-04 01:54:20 -080041139TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, n_div_4_strided_cn) {
Marat Dukhan779b2532021-06-29 14:14:13 -070041140 for (uint32_t n = 8; n <= 12; n += 4) {
41141 for (size_t k = 1; k <= 5; k += 2) {
41142 GemmMicrokernelTester()
41143 .mr(3)
41144 .nr(4)
41145 .kr(1)
41146 .sr(1)
41147 .m(3)
41148 .n(n)
41149 .k(k)
41150 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080041151 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070041152 }
41153 }
41154}
41155
Marat Dukhan2ac722e2022-01-04 01:54:20 -080041156TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, n_div_4_subtile) {
Marat Dukhan779b2532021-06-29 14:14:13 -070041157 for (uint32_t n = 8; n <= 12; n += 4) {
41158 for (size_t k = 1; k <= 5; k += 2) {
41159 for (uint32_t m = 1; m <= 3; m++) {
41160 GemmMicrokernelTester()
41161 .mr(3)
41162 .nr(4)
41163 .kr(1)
41164 .sr(1)
41165 .m(m)
41166 .n(n)
41167 .k(k)
41168 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080041169 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070041170 }
41171 }
41172 }
41173}
41174
Marat Dukhan2ac722e2022-01-04 01:54:20 -080041175TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, small_kernel) {
Marat Dukhan779b2532021-06-29 14:14:13 -070041176 for (size_t k = 1; k <= 5; k += 2) {
41177 GemmMicrokernelTester()
41178 .mr(3)
41179 .nr(4)
41180 .kr(1)
41181 .sr(1)
41182 .m(3)
41183 .n(4)
41184 .k(k)
41185 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080041186 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070041187 }
41188}
41189
Marat Dukhan2ac722e2022-01-04 01:54:20 -080041190TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, small_kernel_subtile) {
Marat Dukhan779b2532021-06-29 14:14:13 -070041191 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080041192 for (uint32_t n = 1; n <= 4; n++) {
41193 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan779b2532021-06-29 14:14:13 -070041194 GemmMicrokernelTester()
41195 .mr(3)
41196 .nr(4)
41197 .kr(1)
41198 .sr(1)
41199 .m(m)
41200 .n(n)
41201 .k(k)
41202 .ks(3)
41203 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080041204 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070041205 }
41206 }
41207 }
41208}
41209
Marat Dukhan2ac722e2022-01-04 01:54:20 -080041210TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, n_gt_4_small_kernel) {
Marat Dukhan779b2532021-06-29 14:14:13 -070041211 for (uint32_t n = 5; n < 8; n++) {
41212 for (size_t k = 1; k <= 5; k += 2) {
41213 GemmMicrokernelTester()
41214 .mr(3)
41215 .nr(4)
41216 .kr(1)
41217 .sr(1)
41218 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080041219 .n(n)
Marat Dukhan779b2532021-06-29 14:14:13 -070041220 .k(k)
41221 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080041222 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070041223 }
41224 }
41225}
41226
Marat Dukhan2ac722e2022-01-04 01:54:20 -080041227TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, n_div_4_small_kernel) {
Marat Dukhan779b2532021-06-29 14:14:13 -070041228 for (uint32_t n = 8; n <= 12; n += 4) {
41229 for (size_t k = 1; k <= 5; k += 2) {
41230 GemmMicrokernelTester()
41231 .mr(3)
41232 .nr(4)
41233 .kr(1)
41234 .sr(1)
41235 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080041236 .n(n)
Marat Dukhan779b2532021-06-29 14:14:13 -070041237 .k(k)
41238 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080041239 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070041240 }
41241 }
41242}
41243
Marat Dukhan2ac722e2022-01-04 01:54:20 -080041244TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, strided_cm_subtile) {
Marat Dukhan779b2532021-06-29 14:14:13 -070041245 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080041246 for (uint32_t n = 1; n <= 4; n++) {
41247 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan779b2532021-06-29 14:14:13 -070041248 GemmMicrokernelTester()
41249 .mr(3)
41250 .nr(4)
41251 .kr(1)
41252 .sr(1)
41253 .m(m)
41254 .n(n)
41255 .k(k)
41256 .cm_stride(7)
41257 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080041258 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070041259 }
41260 }
41261 }
41262}
41263
Marat Dukhan2ac722e2022-01-04 01:54:20 -080041264TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, a_offset) {
Marat Dukhan779b2532021-06-29 14:14:13 -070041265 for (size_t k = 1; k <= 5; k += 2) {
41266 GemmMicrokernelTester()
41267 .mr(3)
41268 .nr(4)
41269 .kr(1)
41270 .sr(1)
41271 .m(3)
41272 .n(4)
41273 .k(k)
41274 .ks(3)
41275 .a_offset(17)
Marat Dukhan50323b82022-01-11 00:12:01 -080041276 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070041277 }
41278}
41279
Marat Dukhan2ac722e2022-01-04 01:54:20 -080041280TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080041281 for (size_t k = 1; k <= 5; k += 2) {
41282 for (uint32_t mz = 0; mz < 3; mz++) {
Marat Dukhan779b2532021-06-29 14:14:13 -070041283 GemmMicrokernelTester()
41284 .mr(3)
41285 .nr(4)
41286 .kr(1)
41287 .sr(1)
41288 .m(3)
41289 .n(4)
41290 .k(k)
41291 .ks(3)
41292 .a_offset(17)
41293 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080041294 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070041295 }
41296 }
41297}
41298
Marat Dukhan2ac722e2022-01-04 01:54:20 -080041299TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, qmin) {
Marat Dukhan779b2532021-06-29 14:14:13 -070041300 GemmMicrokernelTester()
41301 .mr(3)
41302 .nr(4)
41303 .kr(1)
41304 .sr(1)
41305 .m(3)
41306 .n(4)
41307 .k(1)
41308 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080041309 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070041310}
41311
Marat Dukhan2ac722e2022-01-04 01:54:20 -080041312TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, qmax) {
Marat Dukhan779b2532021-06-29 14:14:13 -070041313 GemmMicrokernelTester()
41314 .mr(3)
41315 .nr(4)
41316 .kr(1)
41317 .sr(1)
41318 .m(3)
41319 .n(4)
41320 .k(1)
41321 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080041322 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070041323}
41324
Marat Dukhan2ac722e2022-01-04 01:54:20 -080041325TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, strided_cm) {
Marat Dukhan779b2532021-06-29 14:14:13 -070041326 GemmMicrokernelTester()
41327 .mr(3)
41328 .nr(4)
41329 .kr(1)
41330 .sr(1)
41331 .m(3)
41332 .n(4)
41333 .k(1)
41334 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080041335 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070041336}
41337
41338
Marat Dukhan2ac722e2022-01-04 01:54:20 -080041339TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, k_eq_1) {
Marat Dukhan779b2532021-06-29 14:14:13 -070041340 GemmMicrokernelTester()
41341 .mr(4)
41342 .nr(4)
41343 .kr(1)
41344 .sr(1)
41345 .m(4)
41346 .n(4)
41347 .k(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080041348 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070041349}
41350
Marat Dukhan2ac722e2022-01-04 01:54:20 -080041351TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, strided_cn) {
Marat Dukhan779b2532021-06-29 14:14:13 -070041352 GemmMicrokernelTester()
41353 .mr(4)
41354 .nr(4)
41355 .kr(1)
41356 .sr(1)
41357 .m(4)
41358 .n(4)
41359 .k(1)
41360 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080041361 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070041362}
41363
Marat Dukhan2ac722e2022-01-04 01:54:20 -080041364TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080041365 for (uint32_t n = 1; n <= 4; n++) {
41366 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan779b2532021-06-29 14:14:13 -070041367 GemmMicrokernelTester()
41368 .mr(4)
41369 .nr(4)
41370 .kr(1)
41371 .sr(1)
41372 .m(m)
41373 .n(n)
41374 .k(1)
41375 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080041376 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070041377 }
41378 }
41379}
41380
Marat Dukhan2ac722e2022-01-04 01:54:20 -080041381TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, k_eq_1_subtile_m) {
Marat Dukhan779b2532021-06-29 14:14:13 -070041382 for (uint32_t m = 1; m <= 4; m++) {
41383 GemmMicrokernelTester()
41384 .mr(4)
41385 .nr(4)
41386 .kr(1)
41387 .sr(1)
41388 .m(m)
41389 .n(4)
41390 .k(1)
41391 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080041392 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070041393 }
41394}
41395
Marat Dukhan2ac722e2022-01-04 01:54:20 -080041396TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, k_eq_1_subtile_n) {
Marat Dukhan779b2532021-06-29 14:14:13 -070041397 for (uint32_t n = 1; n <= 4; n++) {
41398 GemmMicrokernelTester()
41399 .mr(4)
41400 .nr(4)
41401 .kr(1)
41402 .sr(1)
41403 .m(4)
41404 .n(n)
41405 .k(1)
41406 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080041407 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070041408 }
41409}
41410
Marat Dukhan2ac722e2022-01-04 01:54:20 -080041411TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, k_gt_1) {
Marat Dukhan779b2532021-06-29 14:14:13 -070041412 for (size_t k = 2; k < 10; k++) {
41413 GemmMicrokernelTester()
41414 .mr(4)
41415 .nr(4)
41416 .kr(1)
41417 .sr(1)
41418 .m(4)
41419 .n(4)
41420 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080041421 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070041422 }
41423}
41424
Marat Dukhan2ac722e2022-01-04 01:54:20 -080041425TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, k_gt_1_subtile) {
Marat Dukhan779b2532021-06-29 14:14:13 -070041426 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080041427 for (uint32_t n = 1; n <= 4; n++) {
41428 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan779b2532021-06-29 14:14:13 -070041429 GemmMicrokernelTester()
41430 .mr(4)
41431 .nr(4)
41432 .kr(1)
41433 .sr(1)
41434 .m(m)
41435 .n(n)
41436 .k(k)
41437 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080041438 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070041439 }
41440 }
41441 }
41442}
41443
Marat Dukhan2ac722e2022-01-04 01:54:20 -080041444TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, n_gt_4) {
Marat Dukhan779b2532021-06-29 14:14:13 -070041445 for (uint32_t n = 5; n < 8; n++) {
41446 for (size_t k = 1; k <= 5; k += 2) {
41447 GemmMicrokernelTester()
41448 .mr(4)
41449 .nr(4)
41450 .kr(1)
41451 .sr(1)
41452 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080041453 .n(n)
Marat Dukhan779b2532021-06-29 14:14:13 -070041454 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080041455 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070041456 }
41457 }
41458}
41459
Marat Dukhan2ac722e2022-01-04 01:54:20 -080041460TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, n_gt_4_strided_cn) {
Marat Dukhan779b2532021-06-29 14:14:13 -070041461 for (uint32_t n = 5; n < 8; n++) {
41462 for (size_t k = 1; k <= 5; k += 2) {
41463 GemmMicrokernelTester()
41464 .mr(4)
41465 .nr(4)
41466 .kr(1)
41467 .sr(1)
41468 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080041469 .n(n)
Marat Dukhan779b2532021-06-29 14:14:13 -070041470 .k(k)
41471 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080041472 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070041473 }
41474 }
41475}
41476
Marat Dukhan2ac722e2022-01-04 01:54:20 -080041477TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, n_gt_4_subtile) {
Marat Dukhan779b2532021-06-29 14:14:13 -070041478 for (uint32_t n = 5; n < 8; n++) {
41479 for (size_t k = 1; k <= 5; k += 2) {
41480 for (uint32_t m = 1; m <= 4; m++) {
41481 GemmMicrokernelTester()
41482 .mr(4)
41483 .nr(4)
41484 .kr(1)
41485 .sr(1)
41486 .m(m)
41487 .n(n)
41488 .k(k)
41489 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080041490 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070041491 }
41492 }
41493 }
41494}
41495
Marat Dukhan2ac722e2022-01-04 01:54:20 -080041496TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, n_div_4) {
Marat Dukhan779b2532021-06-29 14:14:13 -070041497 for (uint32_t n = 8; n <= 12; n += 4) {
41498 for (size_t k = 1; k <= 5; k += 2) {
41499 GemmMicrokernelTester()
41500 .mr(4)
41501 .nr(4)
41502 .kr(1)
41503 .sr(1)
41504 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080041505 .n(n)
Marat Dukhan779b2532021-06-29 14:14:13 -070041506 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080041507 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070041508 }
41509 }
41510}
41511
Marat Dukhan2ac722e2022-01-04 01:54:20 -080041512TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, n_div_4_strided_cn) {
Marat Dukhan779b2532021-06-29 14:14:13 -070041513 for (uint32_t n = 8; n <= 12; n += 4) {
41514 for (size_t k = 1; k <= 5; k += 2) {
41515 GemmMicrokernelTester()
41516 .mr(4)
41517 .nr(4)
41518 .kr(1)
41519 .sr(1)
41520 .m(4)
41521 .n(n)
41522 .k(k)
41523 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080041524 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070041525 }
41526 }
41527}
41528
Marat Dukhan2ac722e2022-01-04 01:54:20 -080041529TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, n_div_4_subtile) {
Marat Dukhan779b2532021-06-29 14:14:13 -070041530 for (uint32_t n = 8; n <= 12; n += 4) {
41531 for (size_t k = 1; k <= 5; k += 2) {
41532 for (uint32_t m = 1; m <= 4; m++) {
41533 GemmMicrokernelTester()
41534 .mr(4)
41535 .nr(4)
41536 .kr(1)
41537 .sr(1)
41538 .m(m)
41539 .n(n)
41540 .k(k)
41541 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080041542 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070041543 }
41544 }
41545 }
41546}
41547
Marat Dukhan2ac722e2022-01-04 01:54:20 -080041548TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, small_kernel) {
Marat Dukhan779b2532021-06-29 14:14:13 -070041549 for (size_t k = 1; k <= 5; k += 2) {
41550 GemmMicrokernelTester()
41551 .mr(4)
41552 .nr(4)
41553 .kr(1)
41554 .sr(1)
41555 .m(4)
41556 .n(4)
41557 .k(k)
41558 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080041559 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070041560 }
41561}
41562
Marat Dukhan2ac722e2022-01-04 01:54:20 -080041563TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, small_kernel_subtile) {
Marat Dukhan779b2532021-06-29 14:14:13 -070041564 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080041565 for (uint32_t n = 1; n <= 4; n++) {
41566 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan779b2532021-06-29 14:14:13 -070041567 GemmMicrokernelTester()
41568 .mr(4)
41569 .nr(4)
41570 .kr(1)
41571 .sr(1)
41572 .m(m)
41573 .n(n)
41574 .k(k)
41575 .ks(3)
41576 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080041577 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070041578 }
41579 }
41580 }
41581}
41582
Marat Dukhan2ac722e2022-01-04 01:54:20 -080041583TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, n_gt_4_small_kernel) {
Marat Dukhan779b2532021-06-29 14:14:13 -070041584 for (uint32_t n = 5; n < 8; n++) {
41585 for (size_t k = 1; k <= 5; k += 2) {
41586 GemmMicrokernelTester()
41587 .mr(4)
41588 .nr(4)
41589 .kr(1)
41590 .sr(1)
41591 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080041592 .n(n)
Marat Dukhan779b2532021-06-29 14:14:13 -070041593 .k(k)
41594 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080041595 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070041596 }
41597 }
41598}
41599
Marat Dukhan2ac722e2022-01-04 01:54:20 -080041600TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, n_div_4_small_kernel) {
Marat Dukhan779b2532021-06-29 14:14:13 -070041601 for (uint32_t n = 8; n <= 12; n += 4) {
41602 for (size_t k = 1; k <= 5; k += 2) {
41603 GemmMicrokernelTester()
41604 .mr(4)
41605 .nr(4)
41606 .kr(1)
41607 .sr(1)
41608 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080041609 .n(n)
Marat Dukhan779b2532021-06-29 14:14:13 -070041610 .k(k)
41611 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080041612 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070041613 }
41614 }
41615}
41616
Marat Dukhan2ac722e2022-01-04 01:54:20 -080041617TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, strided_cm_subtile) {
Marat Dukhan779b2532021-06-29 14:14:13 -070041618 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080041619 for (uint32_t n = 1; n <= 4; n++) {
41620 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan779b2532021-06-29 14:14:13 -070041621 GemmMicrokernelTester()
41622 .mr(4)
41623 .nr(4)
41624 .kr(1)
41625 .sr(1)
41626 .m(m)
41627 .n(n)
41628 .k(k)
41629 .cm_stride(7)
41630 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080041631 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070041632 }
41633 }
41634 }
41635}
41636
Marat Dukhan2ac722e2022-01-04 01:54:20 -080041637TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, a_offset) {
Marat Dukhan779b2532021-06-29 14:14:13 -070041638 for (size_t k = 1; k <= 5; k += 2) {
41639 GemmMicrokernelTester()
41640 .mr(4)
41641 .nr(4)
41642 .kr(1)
41643 .sr(1)
41644 .m(4)
41645 .n(4)
41646 .k(k)
41647 .ks(3)
41648 .a_offset(23)
Marat Dukhan50323b82022-01-11 00:12:01 -080041649 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070041650 }
41651}
41652
Marat Dukhan2ac722e2022-01-04 01:54:20 -080041653TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080041654 for (size_t k = 1; k <= 5; k += 2) {
41655 for (uint32_t mz = 0; mz < 4; mz++) {
Marat Dukhan779b2532021-06-29 14:14:13 -070041656 GemmMicrokernelTester()
41657 .mr(4)
41658 .nr(4)
41659 .kr(1)
41660 .sr(1)
41661 .m(4)
41662 .n(4)
41663 .k(k)
41664 .ks(3)
41665 .a_offset(23)
41666 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080041667 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070041668 }
41669 }
41670}
41671
Marat Dukhan2ac722e2022-01-04 01:54:20 -080041672TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, qmin) {
Marat Dukhan779b2532021-06-29 14:14:13 -070041673 GemmMicrokernelTester()
41674 .mr(4)
41675 .nr(4)
41676 .kr(1)
41677 .sr(1)
41678 .m(4)
41679 .n(4)
41680 .k(1)
41681 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080041682 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070041683}
41684
Marat Dukhan2ac722e2022-01-04 01:54:20 -080041685TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, qmax) {
Marat Dukhan779b2532021-06-29 14:14:13 -070041686 GemmMicrokernelTester()
41687 .mr(4)
41688 .nr(4)
41689 .kr(1)
41690 .sr(1)
41691 .m(4)
41692 .n(4)
41693 .k(1)
41694 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080041695 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070041696}
41697
Marat Dukhan2ac722e2022-01-04 01:54:20 -080041698TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, strided_cm) {
Marat Dukhan779b2532021-06-29 14:14:13 -070041699 GemmMicrokernelTester()
41700 .mr(4)
41701 .nr(4)
41702 .kr(1)
41703 .sr(1)
41704 .m(4)
41705 .n(4)
41706 .k(1)
41707 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080041708 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070041709}
Marat Dukhan272d4d92022-01-04 15:07:14 -080041710
41711
41712TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_eq_1) {
41713 GemmMicrokernelTester()
41714 .mr(1)
41715 .nr(2)
41716 .kr(1)
41717 .sr(1)
41718 .m(1)
41719 .n(2)
41720 .k(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080041721 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080041722}
41723
41724TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, strided_cn) {
41725 GemmMicrokernelTester()
41726 .mr(1)
41727 .nr(2)
41728 .kr(1)
41729 .sr(1)
41730 .m(1)
41731 .n(2)
41732 .k(1)
41733 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080041734 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080041735}
41736
41737TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080041738 for (uint32_t n = 1; n <= 2; n++) {
41739 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080041740 GemmMicrokernelTester()
41741 .mr(1)
41742 .nr(2)
41743 .kr(1)
41744 .sr(1)
41745 .m(m)
41746 .n(n)
41747 .k(1)
41748 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080041749 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080041750 }
41751 }
41752}
41753
41754TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_eq_1_subtile_m) {
41755 for (uint32_t m = 1; m <= 1; m++) {
41756 GemmMicrokernelTester()
41757 .mr(1)
41758 .nr(2)
41759 .kr(1)
41760 .sr(1)
41761 .m(m)
41762 .n(2)
41763 .k(1)
41764 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080041765 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080041766 }
41767}
41768
41769TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_eq_1_subtile_n) {
41770 for (uint32_t n = 1; n <= 2; n++) {
41771 GemmMicrokernelTester()
41772 .mr(1)
41773 .nr(2)
41774 .kr(1)
41775 .sr(1)
41776 .m(1)
41777 .n(n)
41778 .k(1)
41779 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080041780 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080041781 }
41782}
41783
41784TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_gt_1) {
41785 for (size_t k = 2; k < 10; k++) {
41786 GemmMicrokernelTester()
41787 .mr(1)
41788 .nr(2)
41789 .kr(1)
41790 .sr(1)
41791 .m(1)
41792 .n(2)
41793 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080041794 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080041795 }
41796}
41797
41798TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_gt_1_subtile) {
41799 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080041800 for (uint32_t n = 1; n <= 2; n++) {
41801 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080041802 GemmMicrokernelTester()
41803 .mr(1)
41804 .nr(2)
41805 .kr(1)
41806 .sr(1)
41807 .m(m)
41808 .n(n)
41809 .k(k)
41810 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080041811 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080041812 }
41813 }
41814 }
41815}
41816
41817TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_gt_2) {
41818 for (uint32_t n = 3; n < 4; n++) {
41819 for (size_t k = 1; k <= 5; k += 2) {
41820 GemmMicrokernelTester()
41821 .mr(1)
41822 .nr(2)
41823 .kr(1)
41824 .sr(1)
41825 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080041826 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080041827 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080041828 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080041829 }
41830 }
41831}
41832
41833TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_gt_2_strided_cn) {
41834 for (uint32_t n = 3; n < 4; n++) {
41835 for (size_t k = 1; k <= 5; k += 2) {
41836 GemmMicrokernelTester()
41837 .mr(1)
41838 .nr(2)
41839 .kr(1)
41840 .sr(1)
41841 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080041842 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080041843 .k(k)
41844 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080041845 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080041846 }
41847 }
41848}
41849
41850TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_gt_2_subtile) {
41851 for (uint32_t n = 3; n < 4; n++) {
41852 for (size_t k = 1; k <= 5; k += 2) {
41853 for (uint32_t m = 1; m <= 1; m++) {
41854 GemmMicrokernelTester()
41855 .mr(1)
41856 .nr(2)
41857 .kr(1)
41858 .sr(1)
41859 .m(m)
41860 .n(n)
41861 .k(k)
41862 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080041863 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080041864 }
41865 }
41866 }
41867}
41868
41869TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_div_2) {
41870 for (uint32_t n = 4; n <= 6; n += 2) {
41871 for (size_t k = 1; k <= 5; k += 2) {
41872 GemmMicrokernelTester()
41873 .mr(1)
41874 .nr(2)
41875 .kr(1)
41876 .sr(1)
41877 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080041878 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080041879 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080041880 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080041881 }
41882 }
41883}
41884
41885TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_div_2_strided_cn) {
41886 for (uint32_t n = 4; n <= 6; n += 2) {
41887 for (size_t k = 1; k <= 5; k += 2) {
41888 GemmMicrokernelTester()
41889 .mr(1)
41890 .nr(2)
41891 .kr(1)
41892 .sr(1)
41893 .m(1)
41894 .n(n)
41895 .k(k)
41896 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080041897 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080041898 }
41899 }
41900}
41901
41902TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_div_2_subtile) {
41903 for (uint32_t n = 4; n <= 6; n += 2) {
41904 for (size_t k = 1; k <= 5; k += 2) {
41905 for (uint32_t m = 1; m <= 1; m++) {
41906 GemmMicrokernelTester()
41907 .mr(1)
41908 .nr(2)
41909 .kr(1)
41910 .sr(1)
41911 .m(m)
41912 .n(n)
41913 .k(k)
41914 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080041915 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080041916 }
41917 }
41918 }
41919}
41920
41921TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, small_kernel) {
41922 for (size_t k = 1; k <= 5; k += 2) {
41923 GemmMicrokernelTester()
41924 .mr(1)
41925 .nr(2)
41926 .kr(1)
41927 .sr(1)
41928 .m(1)
41929 .n(2)
41930 .k(k)
41931 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080041932 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080041933 }
41934}
41935
41936TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, small_kernel_subtile) {
41937 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080041938 for (uint32_t n = 1; n <= 2; n++) {
41939 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080041940 GemmMicrokernelTester()
41941 .mr(1)
41942 .nr(2)
41943 .kr(1)
41944 .sr(1)
41945 .m(m)
41946 .n(n)
41947 .k(k)
41948 .ks(3)
41949 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080041950 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080041951 }
41952 }
41953 }
41954}
41955
41956TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_gt_2_small_kernel) {
41957 for (uint32_t n = 3; n < 4; n++) {
41958 for (size_t k = 1; k <= 5; k += 2) {
41959 GemmMicrokernelTester()
41960 .mr(1)
41961 .nr(2)
41962 .kr(1)
41963 .sr(1)
41964 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080041965 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080041966 .k(k)
41967 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080041968 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080041969 }
41970 }
41971}
41972
41973TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_div_2_small_kernel) {
41974 for (uint32_t n = 4; n <= 6; n += 2) {
41975 for (size_t k = 1; k <= 5; k += 2) {
41976 GemmMicrokernelTester()
41977 .mr(1)
41978 .nr(2)
41979 .kr(1)
41980 .sr(1)
41981 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080041982 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080041983 .k(k)
41984 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080041985 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080041986 }
41987 }
41988}
41989
41990TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, strided_cm_subtile) {
41991 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080041992 for (uint32_t n = 1; n <= 2; n++) {
41993 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080041994 GemmMicrokernelTester()
41995 .mr(1)
41996 .nr(2)
41997 .kr(1)
41998 .sr(1)
41999 .m(m)
42000 .n(n)
42001 .k(k)
42002 .cm_stride(5)
42003 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080042004 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080042005 }
42006 }
42007 }
42008}
42009
42010TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, a_offset) {
42011 for (size_t k = 1; k <= 5; k += 2) {
42012 GemmMicrokernelTester()
42013 .mr(1)
42014 .nr(2)
42015 .kr(1)
42016 .sr(1)
42017 .m(1)
42018 .n(2)
42019 .k(k)
42020 .ks(3)
42021 .a_offset(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080042022 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080042023 }
42024}
42025
42026TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080042027 for (size_t k = 1; k <= 5; k += 2) {
42028 for (uint32_t mz = 0; mz < 1; mz++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080042029 GemmMicrokernelTester()
42030 .mr(1)
42031 .nr(2)
42032 .kr(1)
42033 .sr(1)
42034 .m(1)
42035 .n(2)
42036 .k(k)
42037 .ks(3)
42038 .a_offset(7)
42039 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080042040 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080042041 }
42042 }
42043}
42044
42045TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, qmin) {
42046 GemmMicrokernelTester()
42047 .mr(1)
42048 .nr(2)
42049 .kr(1)
42050 .sr(1)
42051 .m(1)
42052 .n(2)
42053 .k(1)
42054 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080042055 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080042056}
42057
42058TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, qmax) {
42059 GemmMicrokernelTester()
42060 .mr(1)
42061 .nr(2)
42062 .kr(1)
42063 .sr(1)
42064 .m(1)
42065 .n(2)
42066 .k(1)
42067 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080042068 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080042069}
42070
42071TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, strided_cm) {
42072 GemmMicrokernelTester()
42073 .mr(1)
42074 .nr(2)
42075 .kr(1)
42076 .sr(1)
42077 .m(1)
42078 .n(2)
42079 .k(1)
42080 .cm_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080042081 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080042082}
42083
42084
42085TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_eq_1) {
42086 GemmMicrokernelTester()
42087 .mr(2)
42088 .nr(2)
42089 .kr(1)
42090 .sr(1)
42091 .m(2)
42092 .n(2)
42093 .k(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080042094 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080042095}
42096
42097TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, strided_cn) {
42098 GemmMicrokernelTester()
42099 .mr(2)
42100 .nr(2)
42101 .kr(1)
42102 .sr(1)
42103 .m(2)
42104 .n(2)
42105 .k(1)
42106 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080042107 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080042108}
42109
42110TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080042111 for (uint32_t n = 1; n <= 2; n++) {
42112 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080042113 GemmMicrokernelTester()
42114 .mr(2)
42115 .nr(2)
42116 .kr(1)
42117 .sr(1)
42118 .m(m)
42119 .n(n)
42120 .k(1)
42121 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080042122 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080042123 }
42124 }
42125}
42126
42127TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_eq_1_subtile_m) {
42128 for (uint32_t m = 1; m <= 2; m++) {
42129 GemmMicrokernelTester()
42130 .mr(2)
42131 .nr(2)
42132 .kr(1)
42133 .sr(1)
42134 .m(m)
42135 .n(2)
42136 .k(1)
42137 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080042138 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080042139 }
42140}
42141
42142TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_eq_1_subtile_n) {
42143 for (uint32_t n = 1; n <= 2; n++) {
42144 GemmMicrokernelTester()
42145 .mr(2)
42146 .nr(2)
42147 .kr(1)
42148 .sr(1)
42149 .m(2)
42150 .n(n)
42151 .k(1)
42152 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080042153 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080042154 }
42155}
42156
42157TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_gt_1) {
42158 for (size_t k = 2; k < 10; k++) {
42159 GemmMicrokernelTester()
42160 .mr(2)
42161 .nr(2)
42162 .kr(1)
42163 .sr(1)
42164 .m(2)
42165 .n(2)
42166 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080042167 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080042168 }
42169}
42170
42171TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_gt_1_subtile) {
42172 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080042173 for (uint32_t n = 1; n <= 2; n++) {
42174 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080042175 GemmMicrokernelTester()
42176 .mr(2)
42177 .nr(2)
42178 .kr(1)
42179 .sr(1)
42180 .m(m)
42181 .n(n)
42182 .k(k)
42183 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080042184 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080042185 }
42186 }
42187 }
42188}
42189
42190TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_gt_2) {
42191 for (uint32_t n = 3; n < 4; n++) {
42192 for (size_t k = 1; k <= 5; k += 2) {
42193 GemmMicrokernelTester()
42194 .mr(2)
42195 .nr(2)
42196 .kr(1)
42197 .sr(1)
42198 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080042199 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080042200 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080042201 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080042202 }
42203 }
42204}
42205
42206TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_gt_2_strided_cn) {
42207 for (uint32_t n = 3; n < 4; n++) {
42208 for (size_t k = 1; k <= 5; k += 2) {
42209 GemmMicrokernelTester()
42210 .mr(2)
42211 .nr(2)
42212 .kr(1)
42213 .sr(1)
42214 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080042215 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080042216 .k(k)
42217 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080042218 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080042219 }
42220 }
42221}
42222
42223TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_gt_2_subtile) {
42224 for (uint32_t n = 3; n < 4; n++) {
42225 for (size_t k = 1; k <= 5; k += 2) {
42226 for (uint32_t m = 1; m <= 2; m++) {
42227 GemmMicrokernelTester()
42228 .mr(2)
42229 .nr(2)
42230 .kr(1)
42231 .sr(1)
42232 .m(m)
42233 .n(n)
42234 .k(k)
42235 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080042236 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080042237 }
42238 }
42239 }
42240}
42241
42242TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_div_2) {
42243 for (uint32_t n = 4; n <= 6; n += 2) {
42244 for (size_t k = 1; k <= 5; k += 2) {
42245 GemmMicrokernelTester()
42246 .mr(2)
42247 .nr(2)
42248 .kr(1)
42249 .sr(1)
42250 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080042251 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080042252 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080042253 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080042254 }
42255 }
42256}
42257
42258TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_div_2_strided_cn) {
42259 for (uint32_t n = 4; n <= 6; n += 2) {
42260 for (size_t k = 1; k <= 5; k += 2) {
42261 GemmMicrokernelTester()
42262 .mr(2)
42263 .nr(2)
42264 .kr(1)
42265 .sr(1)
42266 .m(2)
42267 .n(n)
42268 .k(k)
42269 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080042270 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080042271 }
42272 }
42273}
42274
42275TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_div_2_subtile) {
42276 for (uint32_t n = 4; n <= 6; n += 2) {
42277 for (size_t k = 1; k <= 5; k += 2) {
42278 for (uint32_t m = 1; m <= 2; m++) {
42279 GemmMicrokernelTester()
42280 .mr(2)
42281 .nr(2)
42282 .kr(1)
42283 .sr(1)
42284 .m(m)
42285 .n(n)
42286 .k(k)
42287 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080042288 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080042289 }
42290 }
42291 }
42292}
42293
42294TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, small_kernel) {
42295 for (size_t k = 1; k <= 5; k += 2) {
42296 GemmMicrokernelTester()
42297 .mr(2)
42298 .nr(2)
42299 .kr(1)
42300 .sr(1)
42301 .m(2)
42302 .n(2)
42303 .k(k)
42304 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080042305 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080042306 }
42307}
42308
42309TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, small_kernel_subtile) {
42310 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080042311 for (uint32_t n = 1; n <= 2; n++) {
42312 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080042313 GemmMicrokernelTester()
42314 .mr(2)
42315 .nr(2)
42316 .kr(1)
42317 .sr(1)
42318 .m(m)
42319 .n(n)
42320 .k(k)
42321 .ks(3)
42322 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080042323 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080042324 }
42325 }
42326 }
42327}
42328
42329TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_gt_2_small_kernel) {
42330 for (uint32_t n = 3; n < 4; n++) {
42331 for (size_t k = 1; k <= 5; k += 2) {
42332 GemmMicrokernelTester()
42333 .mr(2)
42334 .nr(2)
42335 .kr(1)
42336 .sr(1)
42337 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080042338 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080042339 .k(k)
42340 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080042341 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080042342 }
42343 }
42344}
42345
42346TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_div_2_small_kernel) {
42347 for (uint32_t n = 4; n <= 6; n += 2) {
42348 for (size_t k = 1; k <= 5; k += 2) {
42349 GemmMicrokernelTester()
42350 .mr(2)
42351 .nr(2)
42352 .kr(1)
42353 .sr(1)
42354 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080042355 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080042356 .k(k)
42357 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080042358 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080042359 }
42360 }
42361}
42362
42363TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, strided_cm_subtile) {
42364 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080042365 for (uint32_t n = 1; n <= 2; n++) {
42366 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080042367 GemmMicrokernelTester()
42368 .mr(2)
42369 .nr(2)
42370 .kr(1)
42371 .sr(1)
42372 .m(m)
42373 .n(n)
42374 .k(k)
42375 .cm_stride(5)
42376 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080042377 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080042378 }
42379 }
42380 }
42381}
42382
42383TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, a_offset) {
42384 for (size_t k = 1; k <= 5; k += 2) {
42385 GemmMicrokernelTester()
42386 .mr(2)
42387 .nr(2)
42388 .kr(1)
42389 .sr(1)
42390 .m(2)
42391 .n(2)
42392 .k(k)
42393 .ks(3)
42394 .a_offset(13)
Marat Dukhan50323b82022-01-11 00:12:01 -080042395 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080042396 }
42397}
42398
42399TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080042400 for (size_t k = 1; k <= 5; k += 2) {
42401 for (uint32_t mz = 0; mz < 2; mz++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080042402 GemmMicrokernelTester()
42403 .mr(2)
42404 .nr(2)
42405 .kr(1)
42406 .sr(1)
42407 .m(2)
42408 .n(2)
42409 .k(k)
42410 .ks(3)
42411 .a_offset(13)
42412 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080042413 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080042414 }
42415 }
42416}
42417
42418TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, qmin) {
42419 GemmMicrokernelTester()
42420 .mr(2)
42421 .nr(2)
42422 .kr(1)
42423 .sr(1)
42424 .m(2)
42425 .n(2)
42426 .k(1)
42427 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080042428 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080042429}
42430
42431TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, qmax) {
42432 GemmMicrokernelTester()
42433 .mr(2)
42434 .nr(2)
42435 .kr(1)
42436 .sr(1)
42437 .m(2)
42438 .n(2)
42439 .k(1)
42440 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080042441 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080042442}
42443
42444TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, strided_cm) {
42445 GemmMicrokernelTester()
42446 .mr(2)
42447 .nr(2)
42448 .kr(1)
42449 .sr(1)
42450 .m(2)
42451 .n(2)
42452 .k(1)
42453 .cm_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080042454 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080042455}
42456
42457
Marat Dukhan272d4d92022-01-04 15:07:14 -080042458TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_eq_1) {
42459 GemmMicrokernelTester()
42460 .mr(1)
42461 .nr(4)
42462 .kr(1)
42463 .sr(1)
42464 .m(1)
42465 .n(4)
42466 .k(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080042467 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080042468}
42469
42470TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, strided_cn) {
42471 GemmMicrokernelTester()
42472 .mr(1)
42473 .nr(4)
42474 .kr(1)
42475 .sr(1)
42476 .m(1)
42477 .n(4)
42478 .k(1)
42479 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080042480 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080042481}
42482
42483TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080042484 for (uint32_t n = 1; n <= 4; n++) {
42485 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080042486 GemmMicrokernelTester()
42487 .mr(1)
42488 .nr(4)
42489 .kr(1)
42490 .sr(1)
42491 .m(m)
42492 .n(n)
42493 .k(1)
42494 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080042495 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080042496 }
42497 }
42498}
42499
42500TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_eq_1_subtile_m) {
42501 for (uint32_t m = 1; m <= 1; m++) {
42502 GemmMicrokernelTester()
42503 .mr(1)
42504 .nr(4)
42505 .kr(1)
42506 .sr(1)
42507 .m(m)
42508 .n(4)
42509 .k(1)
42510 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080042511 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080042512 }
42513}
42514
42515TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_eq_1_subtile_n) {
42516 for (uint32_t n = 1; n <= 4; n++) {
42517 GemmMicrokernelTester()
42518 .mr(1)
42519 .nr(4)
42520 .kr(1)
42521 .sr(1)
42522 .m(1)
42523 .n(n)
42524 .k(1)
42525 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080042526 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080042527 }
42528}
42529
42530TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_gt_1) {
42531 for (size_t k = 2; k < 10; k++) {
42532 GemmMicrokernelTester()
42533 .mr(1)
42534 .nr(4)
42535 .kr(1)
42536 .sr(1)
42537 .m(1)
42538 .n(4)
42539 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080042540 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080042541 }
42542}
42543
42544TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_gt_1_subtile) {
42545 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080042546 for (uint32_t n = 1; n <= 4; n++) {
42547 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080042548 GemmMicrokernelTester()
42549 .mr(1)
42550 .nr(4)
42551 .kr(1)
42552 .sr(1)
42553 .m(m)
42554 .n(n)
42555 .k(k)
42556 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080042557 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080042558 }
42559 }
42560 }
42561}
42562
42563TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_gt_4) {
42564 for (uint32_t n = 5; n < 8; n++) {
42565 for (size_t k = 1; k <= 5; k += 2) {
42566 GemmMicrokernelTester()
42567 .mr(1)
42568 .nr(4)
42569 .kr(1)
42570 .sr(1)
42571 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080042572 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080042573 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080042574 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080042575 }
42576 }
42577}
42578
42579TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_gt_4_strided_cn) {
42580 for (uint32_t n = 5; n < 8; n++) {
42581 for (size_t k = 1; k <= 5; k += 2) {
42582 GemmMicrokernelTester()
42583 .mr(1)
42584 .nr(4)
42585 .kr(1)
42586 .sr(1)
42587 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080042588 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080042589 .k(k)
42590 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080042591 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080042592 }
42593 }
42594}
42595
42596TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_gt_4_subtile) {
42597 for (uint32_t n = 5; n < 8; n++) {
42598 for (size_t k = 1; k <= 5; k += 2) {
42599 for (uint32_t m = 1; m <= 1; m++) {
42600 GemmMicrokernelTester()
42601 .mr(1)
42602 .nr(4)
42603 .kr(1)
42604 .sr(1)
42605 .m(m)
42606 .n(n)
42607 .k(k)
42608 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080042609 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080042610 }
42611 }
42612 }
42613}
42614
42615TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_div_4) {
42616 for (uint32_t n = 8; n <= 12; n += 4) {
42617 for (size_t k = 1; k <= 5; k += 2) {
42618 GemmMicrokernelTester()
42619 .mr(1)
42620 .nr(4)
42621 .kr(1)
42622 .sr(1)
42623 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080042624 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080042625 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080042626 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080042627 }
42628 }
42629}
42630
42631TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_div_4_strided_cn) {
42632 for (uint32_t n = 8; n <= 12; n += 4) {
42633 for (size_t k = 1; k <= 5; k += 2) {
42634 GemmMicrokernelTester()
42635 .mr(1)
42636 .nr(4)
42637 .kr(1)
42638 .sr(1)
42639 .m(1)
42640 .n(n)
42641 .k(k)
42642 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080042643 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080042644 }
42645 }
42646}
42647
42648TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_div_4_subtile) {
42649 for (uint32_t n = 8; n <= 12; n += 4) {
42650 for (size_t k = 1; k <= 5; k += 2) {
42651 for (uint32_t m = 1; m <= 1; m++) {
42652 GemmMicrokernelTester()
42653 .mr(1)
42654 .nr(4)
42655 .kr(1)
42656 .sr(1)
42657 .m(m)
42658 .n(n)
42659 .k(k)
42660 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080042661 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080042662 }
42663 }
42664 }
42665}
42666
42667TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, small_kernel) {
42668 for (size_t k = 1; k <= 5; k += 2) {
42669 GemmMicrokernelTester()
42670 .mr(1)
42671 .nr(4)
42672 .kr(1)
42673 .sr(1)
42674 .m(1)
42675 .n(4)
42676 .k(k)
42677 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080042678 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080042679 }
42680}
42681
42682TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, small_kernel_subtile) {
42683 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080042684 for (uint32_t n = 1; n <= 4; n++) {
42685 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080042686 GemmMicrokernelTester()
42687 .mr(1)
42688 .nr(4)
42689 .kr(1)
42690 .sr(1)
42691 .m(m)
42692 .n(n)
42693 .k(k)
42694 .ks(3)
42695 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080042696 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080042697 }
42698 }
42699 }
42700}
42701
42702TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_gt_4_small_kernel) {
42703 for (uint32_t n = 5; n < 8; n++) {
42704 for (size_t k = 1; k <= 5; k += 2) {
42705 GemmMicrokernelTester()
42706 .mr(1)
42707 .nr(4)
42708 .kr(1)
42709 .sr(1)
42710 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080042711 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080042712 .k(k)
42713 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080042714 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080042715 }
42716 }
42717}
42718
42719TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_div_4_small_kernel) {
42720 for (uint32_t n = 8; n <= 12; n += 4) {
42721 for (size_t k = 1; k <= 5; k += 2) {
42722 GemmMicrokernelTester()
42723 .mr(1)
42724 .nr(4)
42725 .kr(1)
42726 .sr(1)
42727 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080042728 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080042729 .k(k)
42730 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080042731 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080042732 }
42733 }
42734}
42735
42736TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, strided_cm_subtile) {
42737 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080042738 for (uint32_t n = 1; n <= 4; n++) {
42739 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080042740 GemmMicrokernelTester()
42741 .mr(1)
42742 .nr(4)
42743 .kr(1)
42744 .sr(1)
42745 .m(m)
42746 .n(n)
42747 .k(k)
42748 .cm_stride(7)
42749 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080042750 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080042751 }
42752 }
42753 }
42754}
42755
42756TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, a_offset) {
42757 for (size_t k = 1; k <= 5; k += 2) {
42758 GemmMicrokernelTester()
42759 .mr(1)
42760 .nr(4)
42761 .kr(1)
42762 .sr(1)
42763 .m(1)
42764 .n(4)
42765 .k(k)
42766 .ks(3)
42767 .a_offset(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080042768 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080042769 }
42770}
42771
42772TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080042773 for (size_t k = 1; k <= 5; k += 2) {
42774 for (uint32_t mz = 0; mz < 1; mz++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080042775 GemmMicrokernelTester()
42776 .mr(1)
42777 .nr(4)
42778 .kr(1)
42779 .sr(1)
42780 .m(1)
42781 .n(4)
42782 .k(k)
42783 .ks(3)
42784 .a_offset(7)
42785 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080042786 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080042787 }
42788 }
42789}
42790
42791TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, qmin) {
42792 GemmMicrokernelTester()
42793 .mr(1)
42794 .nr(4)
42795 .kr(1)
42796 .sr(1)
42797 .m(1)
42798 .n(4)
42799 .k(1)
42800 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080042801 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080042802}
42803
42804TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, qmax) {
42805 GemmMicrokernelTester()
42806 .mr(1)
42807 .nr(4)
42808 .kr(1)
42809 .sr(1)
42810 .m(1)
42811 .n(4)
42812 .k(1)
42813 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080042814 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080042815}
42816
42817TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, strided_cm) {
42818 GemmMicrokernelTester()
42819 .mr(1)
42820 .nr(4)
42821 .kr(1)
42822 .sr(1)
42823 .m(1)
42824 .n(4)
42825 .k(1)
42826 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080042827 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080042828}
42829
42830
42831TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_eq_1) {
42832 GemmMicrokernelTester()
42833 .mr(2)
42834 .nr(4)
42835 .kr(1)
42836 .sr(1)
42837 .m(2)
42838 .n(4)
42839 .k(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080042840 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080042841}
42842
42843TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, strided_cn) {
42844 GemmMicrokernelTester()
42845 .mr(2)
42846 .nr(4)
42847 .kr(1)
42848 .sr(1)
42849 .m(2)
42850 .n(4)
42851 .k(1)
42852 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080042853 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080042854}
42855
42856TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080042857 for (uint32_t n = 1; n <= 4; n++) {
42858 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080042859 GemmMicrokernelTester()
42860 .mr(2)
42861 .nr(4)
42862 .kr(1)
42863 .sr(1)
42864 .m(m)
42865 .n(n)
42866 .k(1)
42867 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080042868 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080042869 }
42870 }
42871}
42872
42873TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_eq_1_subtile_m) {
42874 for (uint32_t m = 1; m <= 2; m++) {
42875 GemmMicrokernelTester()
42876 .mr(2)
42877 .nr(4)
42878 .kr(1)
42879 .sr(1)
42880 .m(m)
42881 .n(4)
42882 .k(1)
42883 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080042884 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080042885 }
42886}
42887
42888TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_eq_1_subtile_n) {
42889 for (uint32_t n = 1; n <= 4; n++) {
42890 GemmMicrokernelTester()
42891 .mr(2)
42892 .nr(4)
42893 .kr(1)
42894 .sr(1)
42895 .m(2)
42896 .n(n)
42897 .k(1)
42898 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080042899 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080042900 }
42901}
42902
42903TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_gt_1) {
42904 for (size_t k = 2; k < 10; k++) {
42905 GemmMicrokernelTester()
42906 .mr(2)
42907 .nr(4)
42908 .kr(1)
42909 .sr(1)
42910 .m(2)
42911 .n(4)
42912 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080042913 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080042914 }
42915}
42916
42917TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_gt_1_subtile) {
42918 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080042919 for (uint32_t n = 1; n <= 4; n++) {
42920 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080042921 GemmMicrokernelTester()
42922 .mr(2)
42923 .nr(4)
42924 .kr(1)
42925 .sr(1)
42926 .m(m)
42927 .n(n)
42928 .k(k)
42929 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080042930 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080042931 }
42932 }
42933 }
42934}
42935
42936TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_gt_4) {
42937 for (uint32_t n = 5; n < 8; n++) {
42938 for (size_t k = 1; k <= 5; k += 2) {
42939 GemmMicrokernelTester()
42940 .mr(2)
42941 .nr(4)
42942 .kr(1)
42943 .sr(1)
42944 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080042945 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080042946 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080042947 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080042948 }
42949 }
42950}
42951
42952TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_gt_4_strided_cn) {
42953 for (uint32_t n = 5; n < 8; n++) {
42954 for (size_t k = 1; k <= 5; k += 2) {
42955 GemmMicrokernelTester()
42956 .mr(2)
42957 .nr(4)
42958 .kr(1)
42959 .sr(1)
42960 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080042961 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080042962 .k(k)
42963 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080042964 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080042965 }
42966 }
42967}
42968
42969TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_gt_4_subtile) {
42970 for (uint32_t n = 5; n < 8; n++) {
42971 for (size_t k = 1; k <= 5; k += 2) {
42972 for (uint32_t m = 1; m <= 2; m++) {
42973 GemmMicrokernelTester()
42974 .mr(2)
42975 .nr(4)
42976 .kr(1)
42977 .sr(1)
42978 .m(m)
42979 .n(n)
42980 .k(k)
42981 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080042982 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080042983 }
42984 }
42985 }
42986}
42987
42988TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_div_4) {
42989 for (uint32_t n = 8; n <= 12; n += 4) {
42990 for (size_t k = 1; k <= 5; k += 2) {
42991 GemmMicrokernelTester()
42992 .mr(2)
42993 .nr(4)
42994 .kr(1)
42995 .sr(1)
42996 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080042997 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080042998 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080042999 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080043000 }
43001 }
43002}
43003
43004TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_div_4_strided_cn) {
43005 for (uint32_t n = 8; n <= 12; n += 4) {
43006 for (size_t k = 1; k <= 5; k += 2) {
43007 GemmMicrokernelTester()
43008 .mr(2)
43009 .nr(4)
43010 .kr(1)
43011 .sr(1)
43012 .m(2)
43013 .n(n)
43014 .k(k)
43015 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080043016 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080043017 }
43018 }
43019}
43020
43021TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_div_4_subtile) {
43022 for (uint32_t n = 8; n <= 12; n += 4) {
43023 for (size_t k = 1; k <= 5; k += 2) {
43024 for (uint32_t m = 1; m <= 2; m++) {
43025 GemmMicrokernelTester()
43026 .mr(2)
43027 .nr(4)
43028 .kr(1)
43029 .sr(1)
43030 .m(m)
43031 .n(n)
43032 .k(k)
43033 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080043034 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080043035 }
43036 }
43037 }
43038}
43039
43040TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, small_kernel) {
43041 for (size_t k = 1; k <= 5; k += 2) {
43042 GemmMicrokernelTester()
43043 .mr(2)
43044 .nr(4)
43045 .kr(1)
43046 .sr(1)
43047 .m(2)
43048 .n(4)
43049 .k(k)
43050 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080043051 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080043052 }
43053}
43054
43055TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, small_kernel_subtile) {
43056 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080043057 for (uint32_t n = 1; n <= 4; n++) {
43058 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080043059 GemmMicrokernelTester()
43060 .mr(2)
43061 .nr(4)
43062 .kr(1)
43063 .sr(1)
43064 .m(m)
43065 .n(n)
43066 .k(k)
43067 .ks(3)
43068 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080043069 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080043070 }
43071 }
43072 }
43073}
43074
43075TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_gt_4_small_kernel) {
43076 for (uint32_t n = 5; n < 8; n++) {
43077 for (size_t k = 1; k <= 5; k += 2) {
43078 GemmMicrokernelTester()
43079 .mr(2)
43080 .nr(4)
43081 .kr(1)
43082 .sr(1)
43083 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080043084 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080043085 .k(k)
43086 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080043087 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080043088 }
43089 }
43090}
43091
43092TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_div_4_small_kernel) {
43093 for (uint32_t n = 8; n <= 12; n += 4) {
43094 for (size_t k = 1; k <= 5; k += 2) {
43095 GemmMicrokernelTester()
43096 .mr(2)
43097 .nr(4)
43098 .kr(1)
43099 .sr(1)
43100 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080043101 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080043102 .k(k)
43103 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080043104 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080043105 }
43106 }
43107}
43108
43109TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, strided_cm_subtile) {
43110 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080043111 for (uint32_t n = 1; n <= 4; n++) {
43112 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080043113 GemmMicrokernelTester()
43114 .mr(2)
43115 .nr(4)
43116 .kr(1)
43117 .sr(1)
43118 .m(m)
43119 .n(n)
43120 .k(k)
43121 .cm_stride(7)
43122 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080043123 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080043124 }
43125 }
43126 }
43127}
43128
43129TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, a_offset) {
43130 for (size_t k = 1; k <= 5; k += 2) {
43131 GemmMicrokernelTester()
43132 .mr(2)
43133 .nr(4)
43134 .kr(1)
43135 .sr(1)
43136 .m(2)
43137 .n(4)
43138 .k(k)
43139 .ks(3)
43140 .a_offset(13)
Marat Dukhan50323b82022-01-11 00:12:01 -080043141 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080043142 }
43143}
43144
43145TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080043146 for (size_t k = 1; k <= 5; k += 2) {
43147 for (uint32_t mz = 0; mz < 2; mz++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080043148 GemmMicrokernelTester()
43149 .mr(2)
43150 .nr(4)
43151 .kr(1)
43152 .sr(1)
43153 .m(2)
43154 .n(4)
43155 .k(k)
43156 .ks(3)
43157 .a_offset(13)
43158 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080043159 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080043160 }
43161 }
43162}
43163
43164TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, qmin) {
43165 GemmMicrokernelTester()
43166 .mr(2)
43167 .nr(4)
43168 .kr(1)
43169 .sr(1)
43170 .m(2)
43171 .n(4)
43172 .k(1)
43173 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080043174 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080043175}
43176
43177TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, qmax) {
43178 GemmMicrokernelTester()
43179 .mr(2)
43180 .nr(4)
43181 .kr(1)
43182 .sr(1)
43183 .m(2)
43184 .n(4)
43185 .k(1)
43186 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080043187 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080043188}
43189
43190TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, strided_cm) {
43191 GemmMicrokernelTester()
43192 .mr(2)
43193 .nr(4)
43194 .kr(1)
43195 .sr(1)
43196 .m(2)
43197 .n(4)
43198 .k(1)
43199 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080043200 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080043201}
43202
43203
Marat Dukhan272d4d92022-01-04 15:07:14 -080043204TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1) {
43205 GemmMicrokernelTester()
43206 .mr(3)
43207 .nr(2)
43208 .kr(1)
43209 .sr(1)
43210 .m(3)
43211 .n(2)
43212 .k(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080043213 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080043214}
43215
43216TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, strided_cn) {
43217 GemmMicrokernelTester()
43218 .mr(3)
43219 .nr(2)
43220 .kr(1)
43221 .sr(1)
43222 .m(3)
43223 .n(2)
43224 .k(1)
43225 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080043226 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080043227}
43228
43229TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080043230 for (uint32_t n = 1; n <= 2; n++) {
43231 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080043232 GemmMicrokernelTester()
43233 .mr(3)
43234 .nr(2)
43235 .kr(1)
43236 .sr(1)
43237 .m(m)
43238 .n(n)
43239 .k(1)
43240 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080043241 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080043242 }
43243 }
43244}
43245
43246TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
43247 for (uint32_t m = 1; m <= 3; m++) {
43248 GemmMicrokernelTester()
43249 .mr(3)
43250 .nr(2)
43251 .kr(1)
43252 .sr(1)
43253 .m(m)
43254 .n(2)
43255 .k(1)
43256 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080043257 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080043258 }
43259}
43260
43261TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
43262 for (uint32_t n = 1; n <= 2; n++) {
43263 GemmMicrokernelTester()
43264 .mr(3)
43265 .nr(2)
43266 .kr(1)
43267 .sr(1)
43268 .m(3)
43269 .n(n)
43270 .k(1)
43271 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080043272 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080043273 }
43274}
43275
43276TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_gt_1) {
43277 for (size_t k = 2; k < 10; k++) {
43278 GemmMicrokernelTester()
43279 .mr(3)
43280 .nr(2)
43281 .kr(1)
43282 .sr(1)
43283 .m(3)
43284 .n(2)
43285 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080043286 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080043287 }
43288}
43289
43290TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_gt_1_subtile) {
43291 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080043292 for (uint32_t n = 1; n <= 2; n++) {
43293 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080043294 GemmMicrokernelTester()
43295 .mr(3)
43296 .nr(2)
43297 .kr(1)
43298 .sr(1)
43299 .m(m)
43300 .n(n)
43301 .k(k)
43302 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080043303 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080043304 }
43305 }
43306 }
43307}
43308
43309TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2) {
43310 for (uint32_t n = 3; n < 4; n++) {
43311 for (size_t k = 1; k <= 5; k += 2) {
43312 GemmMicrokernelTester()
43313 .mr(3)
43314 .nr(2)
43315 .kr(1)
43316 .sr(1)
43317 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080043318 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080043319 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080043320 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080043321 }
43322 }
43323}
43324
43325TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
43326 for (uint32_t n = 3; n < 4; n++) {
43327 for (size_t k = 1; k <= 5; k += 2) {
43328 GemmMicrokernelTester()
43329 .mr(3)
43330 .nr(2)
43331 .kr(1)
43332 .sr(1)
43333 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080043334 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080043335 .k(k)
43336 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080043337 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080043338 }
43339 }
43340}
43341
43342TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2_subtile) {
43343 for (uint32_t n = 3; n < 4; n++) {
43344 for (size_t k = 1; k <= 5; k += 2) {
43345 for (uint32_t m = 1; m <= 3; m++) {
43346 GemmMicrokernelTester()
43347 .mr(3)
43348 .nr(2)
43349 .kr(1)
43350 .sr(1)
43351 .m(m)
43352 .n(n)
43353 .k(k)
43354 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080043355 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080043356 }
43357 }
43358 }
43359}
43360
43361TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2) {
43362 for (uint32_t n = 4; n <= 6; n += 2) {
43363 for (size_t k = 1; k <= 5; k += 2) {
43364 GemmMicrokernelTester()
43365 .mr(3)
43366 .nr(2)
43367 .kr(1)
43368 .sr(1)
43369 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080043370 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080043371 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080043372 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080043373 }
43374 }
43375}
43376
43377TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2_strided_cn) {
43378 for (uint32_t n = 4; n <= 6; n += 2) {
43379 for (size_t k = 1; k <= 5; k += 2) {
43380 GemmMicrokernelTester()
43381 .mr(3)
43382 .nr(2)
43383 .kr(1)
43384 .sr(1)
43385 .m(3)
43386 .n(n)
43387 .k(k)
43388 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080043389 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080043390 }
43391 }
43392}
43393
43394TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2_subtile) {
43395 for (uint32_t n = 4; n <= 6; n += 2) {
43396 for (size_t k = 1; k <= 5; k += 2) {
43397 for (uint32_t m = 1; m <= 3; m++) {
43398 GemmMicrokernelTester()
43399 .mr(3)
43400 .nr(2)
43401 .kr(1)
43402 .sr(1)
43403 .m(m)
43404 .n(n)
43405 .k(k)
43406 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080043407 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080043408 }
43409 }
43410 }
43411}
43412
43413TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, small_kernel) {
43414 for (size_t k = 1; k <= 5; k += 2) {
43415 GemmMicrokernelTester()
43416 .mr(3)
43417 .nr(2)
43418 .kr(1)
43419 .sr(1)
43420 .m(3)
43421 .n(2)
43422 .k(k)
43423 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080043424 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080043425 }
43426}
43427
43428TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, small_kernel_subtile) {
43429 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080043430 for (uint32_t n = 1; n <= 2; n++) {
43431 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080043432 GemmMicrokernelTester()
43433 .mr(3)
43434 .nr(2)
43435 .kr(1)
43436 .sr(1)
43437 .m(m)
43438 .n(n)
43439 .k(k)
43440 .ks(3)
43441 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080043442 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080043443 }
43444 }
43445 }
43446}
43447
43448TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2_small_kernel) {
43449 for (uint32_t n = 3; n < 4; n++) {
43450 for (size_t k = 1; k <= 5; k += 2) {
43451 GemmMicrokernelTester()
43452 .mr(3)
43453 .nr(2)
43454 .kr(1)
43455 .sr(1)
43456 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080043457 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080043458 .k(k)
43459 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080043460 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080043461 }
43462 }
43463}
43464
43465TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2_small_kernel) {
43466 for (uint32_t n = 4; n <= 6; n += 2) {
43467 for (size_t k = 1; k <= 5; k += 2) {
43468 GemmMicrokernelTester()
43469 .mr(3)
43470 .nr(2)
43471 .kr(1)
43472 .sr(1)
43473 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080043474 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080043475 .k(k)
43476 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080043477 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080043478 }
43479 }
43480}
43481
43482TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, strided_cm_subtile) {
43483 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080043484 for (uint32_t n = 1; n <= 2; n++) {
43485 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080043486 GemmMicrokernelTester()
43487 .mr(3)
43488 .nr(2)
43489 .kr(1)
43490 .sr(1)
43491 .m(m)
43492 .n(n)
43493 .k(k)
43494 .cm_stride(5)
43495 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080043496 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080043497 }
43498 }
43499 }
43500}
43501
43502TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, a_offset) {
43503 for (size_t k = 1; k <= 5; k += 2) {
43504 GemmMicrokernelTester()
43505 .mr(3)
43506 .nr(2)
43507 .kr(1)
43508 .sr(1)
43509 .m(3)
43510 .n(2)
43511 .k(k)
43512 .ks(3)
43513 .a_offset(17)
Marat Dukhan50323b82022-01-11 00:12:01 -080043514 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080043515 }
43516}
43517
43518TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080043519 for (size_t k = 1; k <= 5; k += 2) {
43520 for (uint32_t mz = 0; mz < 3; mz++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080043521 GemmMicrokernelTester()
43522 .mr(3)
43523 .nr(2)
43524 .kr(1)
43525 .sr(1)
43526 .m(3)
43527 .n(2)
43528 .k(k)
43529 .ks(3)
43530 .a_offset(17)
43531 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080043532 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080043533 }
43534 }
43535}
43536
43537TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, qmin) {
43538 GemmMicrokernelTester()
43539 .mr(3)
43540 .nr(2)
43541 .kr(1)
43542 .sr(1)
43543 .m(3)
43544 .n(2)
43545 .k(1)
43546 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080043547 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080043548}
43549
43550TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, qmax) {
43551 GemmMicrokernelTester()
43552 .mr(3)
43553 .nr(2)
43554 .kr(1)
43555 .sr(1)
43556 .m(3)
43557 .n(2)
43558 .k(1)
43559 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080043560 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080043561}
43562
43563TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, strided_cm) {
43564 GemmMicrokernelTester()
43565 .mr(3)
43566 .nr(2)
43567 .kr(1)
43568 .sr(1)
43569 .m(3)
43570 .n(2)
43571 .k(1)
43572 .cm_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080043573 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080043574}
43575
43576
43577TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1) {
43578 GemmMicrokernelTester()
43579 .mr(4)
43580 .nr(2)
43581 .kr(1)
43582 .sr(1)
43583 .m(4)
43584 .n(2)
43585 .k(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080043586 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080043587}
43588
43589TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, strided_cn) {
43590 GemmMicrokernelTester()
43591 .mr(4)
43592 .nr(2)
43593 .kr(1)
43594 .sr(1)
43595 .m(4)
43596 .n(2)
43597 .k(1)
43598 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080043599 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080043600}
43601
43602TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080043603 for (uint32_t n = 1; n <= 2; n++) {
43604 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080043605 GemmMicrokernelTester()
43606 .mr(4)
43607 .nr(2)
43608 .kr(1)
43609 .sr(1)
43610 .m(m)
43611 .n(n)
43612 .k(1)
43613 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080043614 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080043615 }
43616 }
43617}
43618
43619TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
43620 for (uint32_t m = 1; m <= 4; m++) {
43621 GemmMicrokernelTester()
43622 .mr(4)
43623 .nr(2)
43624 .kr(1)
43625 .sr(1)
43626 .m(m)
43627 .n(2)
43628 .k(1)
43629 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080043630 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080043631 }
43632}
43633
43634TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
43635 for (uint32_t n = 1; n <= 2; n++) {
43636 GemmMicrokernelTester()
43637 .mr(4)
43638 .nr(2)
43639 .kr(1)
43640 .sr(1)
43641 .m(4)
43642 .n(n)
43643 .k(1)
43644 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080043645 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080043646 }
43647}
43648
43649TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_gt_1) {
43650 for (size_t k = 2; k < 10; k++) {
43651 GemmMicrokernelTester()
43652 .mr(4)
43653 .nr(2)
43654 .kr(1)
43655 .sr(1)
43656 .m(4)
43657 .n(2)
43658 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080043659 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080043660 }
43661}
43662
43663TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_gt_1_subtile) {
43664 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080043665 for (uint32_t n = 1; n <= 2; n++) {
43666 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080043667 GemmMicrokernelTester()
43668 .mr(4)
43669 .nr(2)
43670 .kr(1)
43671 .sr(1)
43672 .m(m)
43673 .n(n)
43674 .k(k)
43675 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080043676 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080043677 }
43678 }
43679 }
43680}
43681
43682TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2) {
43683 for (uint32_t n = 3; n < 4; n++) {
43684 for (size_t k = 1; k <= 5; k += 2) {
43685 GemmMicrokernelTester()
43686 .mr(4)
43687 .nr(2)
43688 .kr(1)
43689 .sr(1)
43690 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080043691 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080043692 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080043693 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080043694 }
43695 }
43696}
43697
43698TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
43699 for (uint32_t n = 3; n < 4; n++) {
43700 for (size_t k = 1; k <= 5; k += 2) {
43701 GemmMicrokernelTester()
43702 .mr(4)
43703 .nr(2)
43704 .kr(1)
43705 .sr(1)
43706 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080043707 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080043708 .k(k)
43709 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080043710 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080043711 }
43712 }
43713}
43714
43715TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2_subtile) {
43716 for (uint32_t n = 3; n < 4; n++) {
43717 for (size_t k = 1; k <= 5; k += 2) {
43718 for (uint32_t m = 1; m <= 4; m++) {
43719 GemmMicrokernelTester()
43720 .mr(4)
43721 .nr(2)
43722 .kr(1)
43723 .sr(1)
43724 .m(m)
43725 .n(n)
43726 .k(k)
43727 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080043728 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080043729 }
43730 }
43731 }
43732}
43733
43734TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2) {
43735 for (uint32_t n = 4; n <= 6; n += 2) {
43736 for (size_t k = 1; k <= 5; k += 2) {
43737 GemmMicrokernelTester()
43738 .mr(4)
43739 .nr(2)
43740 .kr(1)
43741 .sr(1)
43742 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080043743 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080043744 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080043745 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080043746 }
43747 }
43748}
43749
43750TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2_strided_cn) {
43751 for (uint32_t n = 4; n <= 6; n += 2) {
43752 for (size_t k = 1; k <= 5; k += 2) {
43753 GemmMicrokernelTester()
43754 .mr(4)
43755 .nr(2)
43756 .kr(1)
43757 .sr(1)
43758 .m(4)
43759 .n(n)
43760 .k(k)
43761 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080043762 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080043763 }
43764 }
43765}
43766
43767TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2_subtile) {
43768 for (uint32_t n = 4; n <= 6; n += 2) {
43769 for (size_t k = 1; k <= 5; k += 2) {
43770 for (uint32_t m = 1; m <= 4; m++) {
43771 GemmMicrokernelTester()
43772 .mr(4)
43773 .nr(2)
43774 .kr(1)
43775 .sr(1)
43776 .m(m)
43777 .n(n)
43778 .k(k)
43779 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080043780 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080043781 }
43782 }
43783 }
43784}
43785
43786TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, small_kernel) {
43787 for (size_t k = 1; k <= 5; k += 2) {
43788 GemmMicrokernelTester()
43789 .mr(4)
43790 .nr(2)
43791 .kr(1)
43792 .sr(1)
43793 .m(4)
43794 .n(2)
43795 .k(k)
43796 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080043797 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080043798 }
43799}
43800
43801TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, small_kernel_subtile) {
43802 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080043803 for (uint32_t n = 1; n <= 2; n++) {
43804 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080043805 GemmMicrokernelTester()
43806 .mr(4)
43807 .nr(2)
43808 .kr(1)
43809 .sr(1)
43810 .m(m)
43811 .n(n)
43812 .k(k)
43813 .ks(3)
43814 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080043815 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080043816 }
43817 }
43818 }
43819}
43820
43821TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2_small_kernel) {
43822 for (uint32_t n = 3; n < 4; n++) {
43823 for (size_t k = 1; k <= 5; k += 2) {
43824 GemmMicrokernelTester()
43825 .mr(4)
43826 .nr(2)
43827 .kr(1)
43828 .sr(1)
43829 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080043830 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080043831 .k(k)
43832 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080043833 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080043834 }
43835 }
43836}
43837
43838TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2_small_kernel) {
43839 for (uint32_t n = 4; n <= 6; n += 2) {
43840 for (size_t k = 1; k <= 5; k += 2) {
43841 GemmMicrokernelTester()
43842 .mr(4)
43843 .nr(2)
43844 .kr(1)
43845 .sr(1)
43846 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080043847 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080043848 .k(k)
43849 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080043850 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080043851 }
43852 }
43853}
43854
43855TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, strided_cm_subtile) {
43856 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080043857 for (uint32_t n = 1; n <= 2; n++) {
43858 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080043859 GemmMicrokernelTester()
43860 .mr(4)
43861 .nr(2)
43862 .kr(1)
43863 .sr(1)
43864 .m(m)
43865 .n(n)
43866 .k(k)
43867 .cm_stride(5)
43868 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080043869 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080043870 }
43871 }
43872 }
43873}
43874
43875TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, a_offset) {
43876 for (size_t k = 1; k <= 5; k += 2) {
43877 GemmMicrokernelTester()
43878 .mr(4)
43879 .nr(2)
43880 .kr(1)
43881 .sr(1)
43882 .m(4)
43883 .n(2)
43884 .k(k)
43885 .ks(3)
43886 .a_offset(23)
Marat Dukhan50323b82022-01-11 00:12:01 -080043887 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080043888 }
43889}
43890
43891TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080043892 for (size_t k = 1; k <= 5; k += 2) {
43893 for (uint32_t mz = 0; mz < 4; mz++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080043894 GemmMicrokernelTester()
43895 .mr(4)
43896 .nr(2)
43897 .kr(1)
43898 .sr(1)
43899 .m(4)
43900 .n(2)
43901 .k(k)
43902 .ks(3)
43903 .a_offset(23)
43904 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080043905 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080043906 }
43907 }
43908}
43909
43910TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, qmin) {
43911 GemmMicrokernelTester()
43912 .mr(4)
43913 .nr(2)
43914 .kr(1)
43915 .sr(1)
43916 .m(4)
43917 .n(2)
43918 .k(1)
43919 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080043920 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080043921}
43922
43923TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, qmax) {
43924 GemmMicrokernelTester()
43925 .mr(4)
43926 .nr(2)
43927 .kr(1)
43928 .sr(1)
43929 .m(4)
43930 .n(2)
43931 .k(1)
43932 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080043933 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080043934}
43935
43936TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, strided_cm) {
43937 GemmMicrokernelTester()
43938 .mr(4)
43939 .nr(2)
43940 .kr(1)
43941 .sr(1)
43942 .m(4)
43943 .n(2)
43944 .k(1)
43945 .cm_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080043946 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080043947}
43948
43949
Marat Dukhan272d4d92022-01-04 15:07:14 -080043950TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1) {
43951 GemmMicrokernelTester()
43952 .mr(3)
43953 .nr(4)
43954 .kr(1)
43955 .sr(1)
43956 .m(3)
43957 .n(4)
43958 .k(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080043959 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080043960}
43961
43962TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, strided_cn) {
43963 GemmMicrokernelTester()
43964 .mr(3)
43965 .nr(4)
43966 .kr(1)
43967 .sr(1)
43968 .m(3)
43969 .n(4)
43970 .k(1)
43971 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080043972 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080043973}
43974
43975TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080043976 for (uint32_t n = 1; n <= 4; n++) {
43977 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080043978 GemmMicrokernelTester()
43979 .mr(3)
43980 .nr(4)
43981 .kr(1)
43982 .sr(1)
43983 .m(m)
43984 .n(n)
43985 .k(1)
43986 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080043987 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080043988 }
43989 }
43990}
43991
43992TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
43993 for (uint32_t m = 1; m <= 3; m++) {
43994 GemmMicrokernelTester()
43995 .mr(3)
43996 .nr(4)
43997 .kr(1)
43998 .sr(1)
43999 .m(m)
44000 .n(4)
44001 .k(1)
44002 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080044003 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080044004 }
44005}
44006
44007TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
44008 for (uint32_t n = 1; n <= 4; n++) {
44009 GemmMicrokernelTester()
44010 .mr(3)
44011 .nr(4)
44012 .kr(1)
44013 .sr(1)
44014 .m(3)
44015 .n(n)
44016 .k(1)
44017 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080044018 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080044019 }
44020}
44021
44022TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_gt_1) {
44023 for (size_t k = 2; k < 10; k++) {
44024 GemmMicrokernelTester()
44025 .mr(3)
44026 .nr(4)
44027 .kr(1)
44028 .sr(1)
44029 .m(3)
44030 .n(4)
44031 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080044032 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080044033 }
44034}
44035
44036TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_gt_1_subtile) {
44037 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080044038 for (uint32_t n = 1; n <= 4; n++) {
44039 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080044040 GemmMicrokernelTester()
44041 .mr(3)
44042 .nr(4)
44043 .kr(1)
44044 .sr(1)
44045 .m(m)
44046 .n(n)
44047 .k(k)
44048 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080044049 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080044050 }
44051 }
44052 }
44053}
44054
44055TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4) {
44056 for (uint32_t n = 5; n < 8; n++) {
44057 for (size_t k = 1; k <= 5; k += 2) {
44058 GemmMicrokernelTester()
44059 .mr(3)
44060 .nr(4)
44061 .kr(1)
44062 .sr(1)
44063 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080044064 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080044065 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080044066 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080044067 }
44068 }
44069}
44070
44071TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
44072 for (uint32_t n = 5; n < 8; n++) {
44073 for (size_t k = 1; k <= 5; k += 2) {
44074 GemmMicrokernelTester()
44075 .mr(3)
44076 .nr(4)
44077 .kr(1)
44078 .sr(1)
44079 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080044080 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080044081 .k(k)
44082 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080044083 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080044084 }
44085 }
44086}
44087
44088TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4_subtile) {
44089 for (uint32_t n = 5; n < 8; n++) {
44090 for (size_t k = 1; k <= 5; k += 2) {
44091 for (uint32_t m = 1; m <= 3; m++) {
44092 GemmMicrokernelTester()
44093 .mr(3)
44094 .nr(4)
44095 .kr(1)
44096 .sr(1)
44097 .m(m)
44098 .n(n)
44099 .k(k)
44100 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080044101 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080044102 }
44103 }
44104 }
44105}
44106
44107TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4) {
44108 for (uint32_t n = 8; n <= 12; n += 4) {
44109 for (size_t k = 1; k <= 5; k += 2) {
44110 GemmMicrokernelTester()
44111 .mr(3)
44112 .nr(4)
44113 .kr(1)
44114 .sr(1)
44115 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080044116 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080044117 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080044118 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080044119 }
44120 }
44121}
44122
44123TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4_strided_cn) {
44124 for (uint32_t n = 8; n <= 12; n += 4) {
44125 for (size_t k = 1; k <= 5; k += 2) {
44126 GemmMicrokernelTester()
44127 .mr(3)
44128 .nr(4)
44129 .kr(1)
44130 .sr(1)
44131 .m(3)
44132 .n(n)
44133 .k(k)
44134 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080044135 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080044136 }
44137 }
44138}
44139
44140TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4_subtile) {
44141 for (uint32_t n = 8; n <= 12; n += 4) {
44142 for (size_t k = 1; k <= 5; k += 2) {
44143 for (uint32_t m = 1; m <= 3; m++) {
44144 GemmMicrokernelTester()
44145 .mr(3)
44146 .nr(4)
44147 .kr(1)
44148 .sr(1)
44149 .m(m)
44150 .n(n)
44151 .k(k)
44152 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080044153 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080044154 }
44155 }
44156 }
44157}
44158
44159TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, small_kernel) {
44160 for (size_t k = 1; k <= 5; k += 2) {
44161 GemmMicrokernelTester()
44162 .mr(3)
44163 .nr(4)
44164 .kr(1)
44165 .sr(1)
44166 .m(3)
44167 .n(4)
44168 .k(k)
44169 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080044170 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080044171 }
44172}
44173
44174TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, small_kernel_subtile) {
44175 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080044176 for (uint32_t n = 1; n <= 4; n++) {
44177 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080044178 GemmMicrokernelTester()
44179 .mr(3)
44180 .nr(4)
44181 .kr(1)
44182 .sr(1)
44183 .m(m)
44184 .n(n)
44185 .k(k)
44186 .ks(3)
44187 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080044188 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080044189 }
44190 }
44191 }
44192}
44193
44194TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4_small_kernel) {
44195 for (uint32_t n = 5; n < 8; n++) {
44196 for (size_t k = 1; k <= 5; k += 2) {
44197 GemmMicrokernelTester()
44198 .mr(3)
44199 .nr(4)
44200 .kr(1)
44201 .sr(1)
44202 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080044203 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080044204 .k(k)
44205 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080044206 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080044207 }
44208 }
44209}
44210
44211TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4_small_kernel) {
44212 for (uint32_t n = 8; n <= 12; n += 4) {
44213 for (size_t k = 1; k <= 5; k += 2) {
44214 GemmMicrokernelTester()
44215 .mr(3)
44216 .nr(4)
44217 .kr(1)
44218 .sr(1)
44219 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080044220 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080044221 .k(k)
44222 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080044223 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080044224 }
44225 }
44226}
44227
44228TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, strided_cm_subtile) {
44229 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080044230 for (uint32_t n = 1; n <= 4; n++) {
44231 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080044232 GemmMicrokernelTester()
44233 .mr(3)
44234 .nr(4)
44235 .kr(1)
44236 .sr(1)
44237 .m(m)
44238 .n(n)
44239 .k(k)
44240 .cm_stride(7)
44241 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080044242 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080044243 }
44244 }
44245 }
44246}
44247
44248TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, a_offset) {
44249 for (size_t k = 1; k <= 5; k += 2) {
44250 GemmMicrokernelTester()
44251 .mr(3)
44252 .nr(4)
44253 .kr(1)
44254 .sr(1)
44255 .m(3)
44256 .n(4)
44257 .k(k)
44258 .ks(3)
44259 .a_offset(17)
Marat Dukhan50323b82022-01-11 00:12:01 -080044260 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080044261 }
44262}
44263
44264TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080044265 for (size_t k = 1; k <= 5; k += 2) {
44266 for (uint32_t mz = 0; mz < 3; mz++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080044267 GemmMicrokernelTester()
44268 .mr(3)
44269 .nr(4)
44270 .kr(1)
44271 .sr(1)
44272 .m(3)
44273 .n(4)
44274 .k(k)
44275 .ks(3)
44276 .a_offset(17)
44277 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080044278 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080044279 }
44280 }
44281}
44282
44283TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, qmin) {
44284 GemmMicrokernelTester()
44285 .mr(3)
44286 .nr(4)
44287 .kr(1)
44288 .sr(1)
44289 .m(3)
44290 .n(4)
44291 .k(1)
44292 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080044293 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080044294}
44295
44296TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, qmax) {
44297 GemmMicrokernelTester()
44298 .mr(3)
44299 .nr(4)
44300 .kr(1)
44301 .sr(1)
44302 .m(3)
44303 .n(4)
44304 .k(1)
44305 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080044306 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080044307}
44308
44309TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, strided_cm) {
44310 GemmMicrokernelTester()
44311 .mr(3)
44312 .nr(4)
44313 .kr(1)
44314 .sr(1)
44315 .m(3)
44316 .n(4)
44317 .k(1)
44318 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080044319 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080044320}
44321
44322
44323TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1) {
44324 GemmMicrokernelTester()
44325 .mr(4)
44326 .nr(4)
44327 .kr(1)
44328 .sr(1)
44329 .m(4)
44330 .n(4)
44331 .k(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080044332 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080044333}
44334
44335TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, strided_cn) {
44336 GemmMicrokernelTester()
44337 .mr(4)
44338 .nr(4)
44339 .kr(1)
44340 .sr(1)
44341 .m(4)
44342 .n(4)
44343 .k(1)
44344 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080044345 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080044346}
44347
44348TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080044349 for (uint32_t n = 1; n <= 4; n++) {
44350 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080044351 GemmMicrokernelTester()
44352 .mr(4)
44353 .nr(4)
44354 .kr(1)
44355 .sr(1)
44356 .m(m)
44357 .n(n)
44358 .k(1)
44359 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080044360 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080044361 }
44362 }
44363}
44364
44365TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
44366 for (uint32_t m = 1; m <= 4; m++) {
44367 GemmMicrokernelTester()
44368 .mr(4)
44369 .nr(4)
44370 .kr(1)
44371 .sr(1)
44372 .m(m)
44373 .n(4)
44374 .k(1)
44375 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080044376 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080044377 }
44378}
44379
44380TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
44381 for (uint32_t n = 1; n <= 4; n++) {
44382 GemmMicrokernelTester()
44383 .mr(4)
44384 .nr(4)
44385 .kr(1)
44386 .sr(1)
44387 .m(4)
44388 .n(n)
44389 .k(1)
44390 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080044391 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080044392 }
44393}
44394
44395TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_gt_1) {
44396 for (size_t k = 2; k < 10; k++) {
44397 GemmMicrokernelTester()
44398 .mr(4)
44399 .nr(4)
44400 .kr(1)
44401 .sr(1)
44402 .m(4)
44403 .n(4)
44404 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080044405 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080044406 }
44407}
44408
44409TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_gt_1_subtile) {
44410 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080044411 for (uint32_t n = 1; n <= 4; n++) {
44412 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080044413 GemmMicrokernelTester()
44414 .mr(4)
44415 .nr(4)
44416 .kr(1)
44417 .sr(1)
44418 .m(m)
44419 .n(n)
44420 .k(k)
44421 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080044422 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080044423 }
44424 }
44425 }
44426}
44427
44428TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4) {
44429 for (uint32_t n = 5; n < 8; n++) {
44430 for (size_t k = 1; k <= 5; k += 2) {
44431 GemmMicrokernelTester()
44432 .mr(4)
44433 .nr(4)
44434 .kr(1)
44435 .sr(1)
44436 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080044437 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080044438 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080044439 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080044440 }
44441 }
44442}
44443
44444TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
44445 for (uint32_t n = 5; n < 8; n++) {
44446 for (size_t k = 1; k <= 5; k += 2) {
44447 GemmMicrokernelTester()
44448 .mr(4)
44449 .nr(4)
44450 .kr(1)
44451 .sr(1)
44452 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080044453 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080044454 .k(k)
44455 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080044456 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080044457 }
44458 }
44459}
44460
44461TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4_subtile) {
44462 for (uint32_t n = 5; n < 8; n++) {
44463 for (size_t k = 1; k <= 5; k += 2) {
44464 for (uint32_t m = 1; m <= 4; m++) {
44465 GemmMicrokernelTester()
44466 .mr(4)
44467 .nr(4)
44468 .kr(1)
44469 .sr(1)
44470 .m(m)
44471 .n(n)
44472 .k(k)
44473 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080044474 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080044475 }
44476 }
44477 }
44478}
44479
44480TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4) {
44481 for (uint32_t n = 8; n <= 12; n += 4) {
44482 for (size_t k = 1; k <= 5; k += 2) {
44483 GemmMicrokernelTester()
44484 .mr(4)
44485 .nr(4)
44486 .kr(1)
44487 .sr(1)
44488 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080044489 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080044490 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080044491 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080044492 }
44493 }
44494}
44495
44496TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4_strided_cn) {
44497 for (uint32_t n = 8; n <= 12; n += 4) {
44498 for (size_t k = 1; k <= 5; k += 2) {
44499 GemmMicrokernelTester()
44500 .mr(4)
44501 .nr(4)
44502 .kr(1)
44503 .sr(1)
44504 .m(4)
44505 .n(n)
44506 .k(k)
44507 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080044508 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080044509 }
44510 }
44511}
44512
44513TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4_subtile) {
44514 for (uint32_t n = 8; n <= 12; n += 4) {
44515 for (size_t k = 1; k <= 5; k += 2) {
44516 for (uint32_t m = 1; m <= 4; m++) {
44517 GemmMicrokernelTester()
44518 .mr(4)
44519 .nr(4)
44520 .kr(1)
44521 .sr(1)
44522 .m(m)
44523 .n(n)
44524 .k(k)
44525 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080044526 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080044527 }
44528 }
44529 }
44530}
44531
44532TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, small_kernel) {
44533 for (size_t k = 1; k <= 5; k += 2) {
44534 GemmMicrokernelTester()
44535 .mr(4)
44536 .nr(4)
44537 .kr(1)
44538 .sr(1)
44539 .m(4)
44540 .n(4)
44541 .k(k)
44542 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080044543 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080044544 }
44545}
44546
44547TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, small_kernel_subtile) {
44548 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080044549 for (uint32_t n = 1; n <= 4; n++) {
44550 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080044551 GemmMicrokernelTester()
44552 .mr(4)
44553 .nr(4)
44554 .kr(1)
44555 .sr(1)
44556 .m(m)
44557 .n(n)
44558 .k(k)
44559 .ks(3)
44560 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080044561 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080044562 }
44563 }
44564 }
44565}
44566
44567TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4_small_kernel) {
44568 for (uint32_t n = 5; n < 8; n++) {
44569 for (size_t k = 1; k <= 5; k += 2) {
44570 GemmMicrokernelTester()
44571 .mr(4)
44572 .nr(4)
44573 .kr(1)
44574 .sr(1)
44575 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080044576 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080044577 .k(k)
44578 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080044579 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080044580 }
44581 }
44582}
44583
44584TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4_small_kernel) {
44585 for (uint32_t n = 8; n <= 12; n += 4) {
44586 for (size_t k = 1; k <= 5; k += 2) {
44587 GemmMicrokernelTester()
44588 .mr(4)
44589 .nr(4)
44590 .kr(1)
44591 .sr(1)
44592 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080044593 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080044594 .k(k)
44595 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080044596 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080044597 }
44598 }
44599}
44600
44601TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, strided_cm_subtile) {
44602 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080044603 for (uint32_t n = 1; n <= 4; n++) {
44604 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080044605 GemmMicrokernelTester()
44606 .mr(4)
44607 .nr(4)
44608 .kr(1)
44609 .sr(1)
44610 .m(m)
44611 .n(n)
44612 .k(k)
44613 .cm_stride(7)
44614 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080044615 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080044616 }
44617 }
44618 }
44619}
44620
44621TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, a_offset) {
44622 for (size_t k = 1; k <= 5; k += 2) {
44623 GemmMicrokernelTester()
44624 .mr(4)
44625 .nr(4)
44626 .kr(1)
44627 .sr(1)
44628 .m(4)
44629 .n(4)
44630 .k(k)
44631 .ks(3)
44632 .a_offset(23)
Marat Dukhan50323b82022-01-11 00:12:01 -080044633 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080044634 }
44635}
44636
44637TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080044638 for (size_t k = 1; k <= 5; k += 2) {
44639 for (uint32_t mz = 0; mz < 4; mz++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080044640 GemmMicrokernelTester()
44641 .mr(4)
44642 .nr(4)
44643 .kr(1)
44644 .sr(1)
44645 .m(4)
44646 .n(4)
44647 .k(k)
44648 .ks(3)
44649 .a_offset(23)
44650 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080044651 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080044652 }
44653 }
44654}
44655
44656TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, qmin) {
44657 GemmMicrokernelTester()
44658 .mr(4)
44659 .nr(4)
44660 .kr(1)
44661 .sr(1)
44662 .m(4)
44663 .n(4)
44664 .k(1)
44665 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080044666 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080044667}
44668
44669TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, qmax) {
44670 GemmMicrokernelTester()
44671 .mr(4)
44672 .nr(4)
44673 .kr(1)
44674 .sr(1)
44675 .m(4)
44676 .n(4)
44677 .k(1)
44678 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080044679 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080044680}
44681
44682TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, strided_cm) {
44683 GemmMicrokernelTester()
44684 .mr(4)
44685 .nr(4)
44686 .kr(1)
44687 .sr(1)
44688 .m(4)
44689 .n(4)
44690 .k(1)
44691 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080044692 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080044693}