blob: 51740a9a92eac042bdffe85032d982fbae425998 [file] [log] [blame]
Marat Dukhan9b474cf2021-05-25 16:37:48 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8//
9// Auto-generated file. Do not edit!
10// Specification: test/qs8-gemm-minmax-fp32.yaml
11// Generator: tools/generate-gemm-test.py
12
13
14#include <gtest/gtest.h>
15
Frank Barchard447aa7b2021-12-28 14:11:40 -080016#include <xnnpack/allocator.h>
Marat Dukhan9b474cf2021-05-25 16:37:48 -070017#include <xnnpack/common.h>
18#include <xnnpack/isa-checks.h>
19
20#include <xnnpack/gemm.h>
21#include <xnnpack/igemm.h>
22#include <xnnpack/ppmm.h>
23#include "gemm-microkernel-tester.h"
24
25
Frank Barchard287952a2021-11-03 15:26:45 -070026#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Frank Barchard15eec022021-11-17 13:26:20 -080027 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, k_eq_16) {
28 TEST_REQUIRES_ARM_NEON;
29 GemmMicrokernelTester()
30 .mr(2)
31 .nr(8)
32 .kr(2)
33 .sr(1)
34 .m(2)
35 .n(8)
36 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080037 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -080038 }
39
40 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, strided_cn) {
41 TEST_REQUIRES_ARM_NEON;
42 GemmMicrokernelTester()
43 .mr(2)
44 .nr(8)
45 .kr(2)
46 .sr(1)
47 .m(2)
48 .n(8)
49 .k(16)
50 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080051 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -080052 }
53
54 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, k_eq_16_strided_a) {
55 TEST_REQUIRES_ARM_NEON;
56 GemmMicrokernelTester()
57 .mr(2)
58 .nr(8)
59 .kr(2)
60 .sr(1)
61 .m(2)
62 .n(8)
63 .k(16)
64 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080065 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -080066 }
67
68 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, k_eq_16_subtile) {
69 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080070 for (uint32_t n = 1; n <= 8; n++) {
71 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard15eec022021-11-17 13:26:20 -080072 GemmMicrokernelTester()
73 .mr(2)
74 .nr(8)
75 .kr(2)
76 .sr(1)
77 .m(m)
78 .n(n)
79 .k(16)
80 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080081 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -080082 }
83 }
84 }
85
86 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, k_eq_16_subtile_m) {
87 TEST_REQUIRES_ARM_NEON;
88 for (uint32_t m = 1; m <= 2; m++) {
89 GemmMicrokernelTester()
90 .mr(2)
91 .nr(8)
92 .kr(2)
93 .sr(1)
94 .m(m)
95 .n(8)
96 .k(16)
97 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080098 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -080099 }
100 }
101
102 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, k_eq_16_subtile_n) {
103 TEST_REQUIRES_ARM_NEON;
104 for (uint32_t n = 1; n <= 8; n++) {
105 GemmMicrokernelTester()
106 .mr(2)
107 .nr(8)
108 .kr(2)
109 .sr(1)
110 .m(2)
111 .n(n)
112 .k(16)
113 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800114 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800115 }
116 }
117
118 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, k_lt_16) {
119 TEST_REQUIRES_ARM_NEON;
120 for (size_t k = 1; k < 16; k++) {
121 GemmMicrokernelTester()
122 .mr(2)
123 .nr(8)
124 .kr(2)
125 .sr(1)
126 .m(2)
127 .n(8)
128 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -0800129 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800130 }
131 }
132
133 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, k_lt_16_strided_a) {
134 TEST_REQUIRES_ARM_NEON;
135 for (size_t k = 1; k < 16; k++) {
136 GemmMicrokernelTester()
137 .mr(2)
138 .nr(8)
139 .kr(2)
140 .sr(1)
141 .m(2)
142 .n(8)
143 .k(k)
144 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -0800145 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800146 }
147 }
148
149 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, k_lt_16_subtile) {
150 TEST_REQUIRES_ARM_NEON;
151 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800152 for (uint32_t n = 1; n <= 8; n++) {
153 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard15eec022021-11-17 13:26:20 -0800154 GemmMicrokernelTester()
155 .mr(2)
156 .nr(8)
157 .kr(2)
158 .sr(1)
159 .m(m)
160 .n(n)
161 .k(k)
162 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800163 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800164 }
165 }
166 }
167 }
168
169 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, k_gt_16) {
170 TEST_REQUIRES_ARM_NEON;
171 for (size_t k = 17; k < 32; k++) {
172 GemmMicrokernelTester()
173 .mr(2)
174 .nr(8)
175 .kr(2)
176 .sr(1)
177 .m(2)
178 .n(8)
179 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -0800180 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800181 }
182 }
183
184 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, k_gt_16_strided_a) {
185 TEST_REQUIRES_ARM_NEON;
186 for (size_t k = 17; k < 32; k++) {
187 GemmMicrokernelTester()
188 .mr(2)
189 .nr(8)
190 .kr(2)
191 .sr(1)
192 .m(2)
193 .n(8)
194 .k(k)
195 .a_stride(37)
Marat Dukhan50323b82022-01-11 00:12:01 -0800196 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800197 }
198 }
199
200 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, k_gt_16_subtile) {
201 TEST_REQUIRES_ARM_NEON;
202 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800203 for (uint32_t n = 1; n <= 8; n++) {
204 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard15eec022021-11-17 13:26:20 -0800205 GemmMicrokernelTester()
206 .mr(2)
207 .nr(8)
208 .kr(2)
209 .sr(1)
210 .m(m)
211 .n(n)
212 .k(k)
213 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800214 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800215 }
216 }
217 }
218 }
219
220 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, k_div_16) {
221 TEST_REQUIRES_ARM_NEON;
222 for (size_t k = 32; k <= 160; k += 16) {
223 GemmMicrokernelTester()
224 .mr(2)
225 .nr(8)
226 .kr(2)
227 .sr(1)
228 .m(2)
229 .n(8)
230 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -0800231 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800232 }
233 }
234
235 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, k_div_16_strided_a) {
236 TEST_REQUIRES_ARM_NEON;
237 for (size_t k = 32; k <= 160; k += 16) {
238 GemmMicrokernelTester()
239 .mr(2)
240 .nr(8)
241 .kr(2)
242 .sr(1)
243 .m(2)
244 .n(8)
245 .k(k)
246 .a_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -0800247 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800248 }
249 }
250
251 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, k_div_16_subtile) {
252 TEST_REQUIRES_ARM_NEON;
253 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800254 for (uint32_t n = 1; n <= 8; n++) {
255 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard15eec022021-11-17 13:26:20 -0800256 GemmMicrokernelTester()
257 .mr(2)
258 .nr(8)
259 .kr(2)
260 .sr(1)
261 .m(m)
262 .n(n)
263 .k(k)
264 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800265 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800266 }
267 }
268 }
269 }
270
271 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, n_gt_8) {
272 TEST_REQUIRES_ARM_NEON;
273 for (uint32_t n = 9; n < 16; n++) {
274 for (size_t k = 1; k <= 80; k += 17) {
275 GemmMicrokernelTester()
276 .mr(2)
277 .nr(8)
278 .kr(2)
279 .sr(1)
280 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800281 .n(n)
Frank Barchard15eec022021-11-17 13:26:20 -0800282 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -0800283 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800284 }
285 }
286 }
287
288 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, n_gt_8_strided_cn) {
289 TEST_REQUIRES_ARM_NEON;
290 for (uint32_t n = 9; n < 16; n++) {
291 for (size_t k = 1; k <= 80; k += 17) {
292 GemmMicrokernelTester()
293 .mr(2)
294 .nr(8)
295 .kr(2)
296 .sr(1)
297 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800298 .n(n)
Frank Barchard15eec022021-11-17 13:26:20 -0800299 .k(k)
300 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -0800301 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800302 }
303 }
304 }
305
306 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, n_gt_8_strided_a) {
307 TEST_REQUIRES_ARM_NEON;
308 for (uint32_t n = 9; n < 16; n++) {
309 for (size_t k = 1; k <= 80; k += 17) {
310 GemmMicrokernelTester()
311 .mr(2)
312 .nr(8)
313 .kr(2)
314 .sr(1)
315 .m(2)
316 .n(n)
317 .k(k)
318 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -0800319 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800320 }
321 }
322 }
323
324 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, n_gt_8_subtile) {
325 TEST_REQUIRES_ARM_NEON;
326 for (uint32_t n = 9; n < 16; n++) {
327 for (size_t k = 1; k <= 80; k += 17) {
328 for (uint32_t m = 1; m <= 2; m++) {
329 GemmMicrokernelTester()
330 .mr(2)
331 .nr(8)
332 .kr(2)
333 .sr(1)
334 .m(m)
335 .n(n)
336 .k(k)
337 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800338 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800339 }
340 }
341 }
342 }
343
344 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, n_div_8) {
345 TEST_REQUIRES_ARM_NEON;
346 for (uint32_t n = 16; n <= 24; n += 8) {
347 for (size_t k = 1; k <= 80; k += 17) {
348 GemmMicrokernelTester()
349 .mr(2)
350 .nr(8)
351 .kr(2)
352 .sr(1)
353 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800354 .n(n)
Frank Barchard15eec022021-11-17 13:26:20 -0800355 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -0800356 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800357 }
358 }
359 }
360
361 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, n_div_8_strided_cn) {
362 TEST_REQUIRES_ARM_NEON;
363 for (uint32_t n = 16; n <= 24; n += 8) {
364 for (size_t k = 1; k <= 80; k += 17) {
365 GemmMicrokernelTester()
366 .mr(2)
367 .nr(8)
368 .kr(2)
369 .sr(1)
370 .m(2)
371 .n(n)
372 .k(k)
373 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -0800374 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800375 }
376 }
377 }
378
379 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, n_div_8_strided_a) {
380 TEST_REQUIRES_ARM_NEON;
381 for (uint32_t n = 16; n <= 24; n += 8) {
382 for (size_t k = 1; k <= 80; k += 17) {
383 GemmMicrokernelTester()
384 .mr(2)
385 .nr(8)
386 .kr(2)
387 .sr(1)
388 .m(2)
389 .n(n)
390 .k(k)
391 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -0800392 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800393 }
394 }
395 }
396
397 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, n_div_8_subtile) {
398 TEST_REQUIRES_ARM_NEON;
399 for (uint32_t n = 16; n <= 24; n += 8) {
400 for (size_t k = 1; k <= 80; k += 17) {
401 for (uint32_t m = 1; m <= 2; m++) {
402 GemmMicrokernelTester()
403 .mr(2)
404 .nr(8)
405 .kr(2)
406 .sr(1)
407 .m(m)
408 .n(n)
409 .k(k)
410 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800411 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800412 }
413 }
414 }
415 }
416
417 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, strided_cm_subtile) {
418 TEST_REQUIRES_ARM_NEON;
419 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800420 for (uint32_t n = 1; n <= 8; n++) {
421 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard15eec022021-11-17 13:26:20 -0800422 GemmMicrokernelTester()
423 .mr(2)
424 .nr(8)
425 .kr(2)
426 .sr(1)
427 .m(m)
428 .n(n)
429 .k(k)
430 .cm_stride(11)
431 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800432 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800433 }
434 }
435 }
436 }
437
438 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, qmin) {
439 TEST_REQUIRES_ARM_NEON;
440 GemmMicrokernelTester()
441 .mr(2)
442 .nr(8)
443 .kr(2)
444 .sr(1)
445 .m(2)
446 .n(8)
447 .k(16)
448 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800449 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800450 }
451
452 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, qmax) {
453 TEST_REQUIRES_ARM_NEON;
454 GemmMicrokernelTester()
455 .mr(2)
456 .nr(8)
457 .kr(2)
458 .sr(1)
459 .m(2)
460 .n(8)
461 .k(16)
462 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800463 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800464 }
465
466 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, strided_cm) {
467 TEST_REQUIRES_ARM_NEON;
468 GemmMicrokernelTester()
469 .mr(2)
470 .nr(8)
471 .kr(2)
472 .sr(1)
473 .m(2)
474 .n(8)
475 .k(16)
476 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -0800477 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800478 }
479#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
480
481
482#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Frank Barchard15eec022021-11-17 13:26:20 -0800483 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, k_eq_16) {
484 TEST_REQUIRES_ARM_NEON_V8;
485 GemmMicrokernelTester()
486 .mr(2)
487 .nr(8)
488 .kr(2)
489 .sr(1)
490 .m(2)
491 .n(8)
492 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -0800493 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800494 }
495
496 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, strided_cn) {
497 TEST_REQUIRES_ARM_NEON_V8;
498 GemmMicrokernelTester()
499 .mr(2)
500 .nr(8)
501 .kr(2)
502 .sr(1)
503 .m(2)
504 .n(8)
505 .k(16)
506 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -0800507 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800508 }
509
510 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, k_eq_16_strided_a) {
511 TEST_REQUIRES_ARM_NEON_V8;
512 GemmMicrokernelTester()
513 .mr(2)
514 .nr(8)
515 .kr(2)
516 .sr(1)
517 .m(2)
518 .n(8)
519 .k(16)
520 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -0800521 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800522 }
523
524 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, k_eq_16_subtile) {
525 TEST_REQUIRES_ARM_NEON_V8;
Zhi An Ng83844ae2022-01-14 09:52:25 -0800526 for (uint32_t n = 1; n <= 8; n++) {
527 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard15eec022021-11-17 13:26:20 -0800528 GemmMicrokernelTester()
529 .mr(2)
530 .nr(8)
531 .kr(2)
532 .sr(1)
533 .m(m)
534 .n(n)
535 .k(16)
536 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800537 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800538 }
539 }
540 }
541
542 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, k_eq_16_subtile_m) {
543 TEST_REQUIRES_ARM_NEON_V8;
544 for (uint32_t m = 1; m <= 2; m++) {
545 GemmMicrokernelTester()
546 .mr(2)
547 .nr(8)
548 .kr(2)
549 .sr(1)
550 .m(m)
551 .n(8)
552 .k(16)
553 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800554 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800555 }
556 }
557
558 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, k_eq_16_subtile_n) {
559 TEST_REQUIRES_ARM_NEON_V8;
560 for (uint32_t n = 1; n <= 8; n++) {
561 GemmMicrokernelTester()
562 .mr(2)
563 .nr(8)
564 .kr(2)
565 .sr(1)
566 .m(2)
567 .n(n)
568 .k(16)
569 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800570 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800571 }
572 }
573
574 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, k_lt_16) {
575 TEST_REQUIRES_ARM_NEON_V8;
576 for (size_t k = 1; k < 16; k++) {
577 GemmMicrokernelTester()
578 .mr(2)
579 .nr(8)
580 .kr(2)
581 .sr(1)
582 .m(2)
583 .n(8)
584 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -0800585 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800586 }
587 }
588
589 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, k_lt_16_strided_a) {
590 TEST_REQUIRES_ARM_NEON_V8;
591 for (size_t k = 1; k < 16; k++) {
592 GemmMicrokernelTester()
593 .mr(2)
594 .nr(8)
595 .kr(2)
596 .sr(1)
597 .m(2)
598 .n(8)
599 .k(k)
600 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -0800601 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800602 }
603 }
604
605 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, k_lt_16_subtile) {
606 TEST_REQUIRES_ARM_NEON_V8;
607 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800608 for (uint32_t n = 1; n <= 8; n++) {
609 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard15eec022021-11-17 13:26:20 -0800610 GemmMicrokernelTester()
611 .mr(2)
612 .nr(8)
613 .kr(2)
614 .sr(1)
615 .m(m)
616 .n(n)
617 .k(k)
618 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800619 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800620 }
621 }
622 }
623 }
624
625 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, k_gt_16) {
626 TEST_REQUIRES_ARM_NEON_V8;
627 for (size_t k = 17; k < 32; k++) {
628 GemmMicrokernelTester()
629 .mr(2)
630 .nr(8)
631 .kr(2)
632 .sr(1)
633 .m(2)
634 .n(8)
635 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -0800636 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800637 }
638 }
639
640 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, k_gt_16_strided_a) {
641 TEST_REQUIRES_ARM_NEON_V8;
642 for (size_t k = 17; k < 32; k++) {
643 GemmMicrokernelTester()
644 .mr(2)
645 .nr(8)
646 .kr(2)
647 .sr(1)
648 .m(2)
649 .n(8)
650 .k(k)
651 .a_stride(37)
Marat Dukhan50323b82022-01-11 00:12:01 -0800652 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800653 }
654 }
655
656 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, k_gt_16_subtile) {
657 TEST_REQUIRES_ARM_NEON_V8;
658 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800659 for (uint32_t n = 1; n <= 8; n++) {
660 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard15eec022021-11-17 13:26:20 -0800661 GemmMicrokernelTester()
662 .mr(2)
663 .nr(8)
664 .kr(2)
665 .sr(1)
666 .m(m)
667 .n(n)
668 .k(k)
669 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800670 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800671 }
672 }
673 }
674 }
675
676 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, k_div_16) {
677 TEST_REQUIRES_ARM_NEON_V8;
678 for (size_t k = 32; k <= 160; k += 16) {
679 GemmMicrokernelTester()
680 .mr(2)
681 .nr(8)
682 .kr(2)
683 .sr(1)
684 .m(2)
685 .n(8)
686 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -0800687 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800688 }
689 }
690
691 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, k_div_16_strided_a) {
692 TEST_REQUIRES_ARM_NEON_V8;
693 for (size_t k = 32; k <= 160; k += 16) {
694 GemmMicrokernelTester()
695 .mr(2)
696 .nr(8)
697 .kr(2)
698 .sr(1)
699 .m(2)
700 .n(8)
701 .k(k)
702 .a_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -0800703 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800704 }
705 }
706
707 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, k_div_16_subtile) {
708 TEST_REQUIRES_ARM_NEON_V8;
709 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800710 for (uint32_t n = 1; n <= 8; n++) {
711 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard15eec022021-11-17 13:26:20 -0800712 GemmMicrokernelTester()
713 .mr(2)
714 .nr(8)
715 .kr(2)
716 .sr(1)
717 .m(m)
718 .n(n)
719 .k(k)
720 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800721 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800722 }
723 }
724 }
725 }
726
727 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, n_gt_8) {
728 TEST_REQUIRES_ARM_NEON_V8;
729 for (uint32_t n = 9; n < 16; n++) {
730 for (size_t k = 1; k <= 80; k += 17) {
731 GemmMicrokernelTester()
732 .mr(2)
733 .nr(8)
734 .kr(2)
735 .sr(1)
736 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800737 .n(n)
Frank Barchard15eec022021-11-17 13:26:20 -0800738 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -0800739 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800740 }
741 }
742 }
743
744 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, n_gt_8_strided_cn) {
745 TEST_REQUIRES_ARM_NEON_V8;
746 for (uint32_t n = 9; n < 16; n++) {
747 for (size_t k = 1; k <= 80; k += 17) {
748 GemmMicrokernelTester()
749 .mr(2)
750 .nr(8)
751 .kr(2)
752 .sr(1)
753 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800754 .n(n)
Frank Barchard15eec022021-11-17 13:26:20 -0800755 .k(k)
756 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -0800757 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800758 }
759 }
760 }
761
762 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, n_gt_8_strided_a) {
763 TEST_REQUIRES_ARM_NEON_V8;
764 for (uint32_t n = 9; n < 16; n++) {
765 for (size_t k = 1; k <= 80; k += 17) {
766 GemmMicrokernelTester()
767 .mr(2)
768 .nr(8)
769 .kr(2)
770 .sr(1)
771 .m(2)
772 .n(n)
773 .k(k)
774 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -0800775 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800776 }
777 }
778 }
779
780 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, n_gt_8_subtile) {
781 TEST_REQUIRES_ARM_NEON_V8;
782 for (uint32_t n = 9; n < 16; n++) {
783 for (size_t k = 1; k <= 80; k += 17) {
784 for (uint32_t m = 1; m <= 2; m++) {
785 GemmMicrokernelTester()
786 .mr(2)
787 .nr(8)
788 .kr(2)
789 .sr(1)
790 .m(m)
791 .n(n)
792 .k(k)
793 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800794 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800795 }
796 }
797 }
798 }
799
800 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, n_div_8) {
801 TEST_REQUIRES_ARM_NEON_V8;
802 for (uint32_t n = 16; n <= 24; n += 8) {
803 for (size_t k = 1; k <= 80; k += 17) {
804 GemmMicrokernelTester()
805 .mr(2)
806 .nr(8)
807 .kr(2)
808 .sr(1)
809 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800810 .n(n)
Frank Barchard15eec022021-11-17 13:26:20 -0800811 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -0800812 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800813 }
814 }
815 }
816
817 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, n_div_8_strided_cn) {
818 TEST_REQUIRES_ARM_NEON_V8;
819 for (uint32_t n = 16; n <= 24; n += 8) {
820 for (size_t k = 1; k <= 80; k += 17) {
821 GemmMicrokernelTester()
822 .mr(2)
823 .nr(8)
824 .kr(2)
825 .sr(1)
826 .m(2)
827 .n(n)
828 .k(k)
829 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -0800830 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800831 }
832 }
833 }
834
835 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, n_div_8_strided_a) {
836 TEST_REQUIRES_ARM_NEON_V8;
837 for (uint32_t n = 16; n <= 24; n += 8) {
838 for (size_t k = 1; k <= 80; k += 17) {
839 GemmMicrokernelTester()
840 .mr(2)
841 .nr(8)
842 .kr(2)
843 .sr(1)
844 .m(2)
845 .n(n)
846 .k(k)
847 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -0800848 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800849 }
850 }
851 }
852
853 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, n_div_8_subtile) {
854 TEST_REQUIRES_ARM_NEON_V8;
855 for (uint32_t n = 16; n <= 24; n += 8) {
856 for (size_t k = 1; k <= 80; k += 17) {
857 for (uint32_t m = 1; m <= 2; m++) {
858 GemmMicrokernelTester()
859 .mr(2)
860 .nr(8)
861 .kr(2)
862 .sr(1)
863 .m(m)
864 .n(n)
865 .k(k)
866 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800867 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800868 }
869 }
870 }
871 }
872
873 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, strided_cm_subtile) {
874 TEST_REQUIRES_ARM_NEON_V8;
875 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800876 for (uint32_t n = 1; n <= 8; n++) {
877 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard15eec022021-11-17 13:26:20 -0800878 GemmMicrokernelTester()
879 .mr(2)
880 .nr(8)
881 .kr(2)
882 .sr(1)
883 .m(m)
884 .n(n)
885 .k(k)
886 .cm_stride(11)
887 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800888 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800889 }
890 }
891 }
892 }
893
894 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, qmin) {
895 TEST_REQUIRES_ARM_NEON_V8;
896 GemmMicrokernelTester()
897 .mr(2)
898 .nr(8)
899 .kr(2)
900 .sr(1)
901 .m(2)
902 .n(8)
903 .k(16)
904 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800905 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800906 }
907
908 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, qmax) {
909 TEST_REQUIRES_ARM_NEON_V8;
910 GemmMicrokernelTester()
911 .mr(2)
912 .nr(8)
913 .kr(2)
914 .sr(1)
915 .m(2)
916 .n(8)
917 .k(16)
918 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800919 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800920 }
921
922 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, strided_cm) {
923 TEST_REQUIRES_ARM_NEON_V8;
924 GemmMicrokernelTester()
925 .mr(2)
926 .nr(8)
927 .kr(2)
928 .sr(1)
929 .m(2)
930 .n(8)
931 .k(16)
932 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -0800933 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800934 }
935#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
936
937
938#if XNN_ARCH_ARM || XNN_ARCH_ARM64
939 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_eq_16) {
940 TEST_REQUIRES_ARM_NEON;
941 GemmMicrokernelTester()
942 .mr(1)
943 .nr(8)
944 .kr(2)
945 .sr(1)
946 .m(1)
947 .n(8)
948 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -0800949 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800950 }
951
952 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, strided_cn) {
953 TEST_REQUIRES_ARM_NEON;
954 GemmMicrokernelTester()
955 .mr(1)
956 .nr(8)
957 .kr(2)
958 .sr(1)
959 .m(1)
960 .n(8)
961 .k(16)
962 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -0800963 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800964 }
965
966 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_eq_16_strided_a) {
967 TEST_REQUIRES_ARM_NEON;
968 GemmMicrokernelTester()
969 .mr(1)
970 .nr(8)
971 .kr(2)
972 .sr(1)
973 .m(1)
974 .n(8)
975 .k(16)
976 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -0800977 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800978 }
979
980 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_eq_16_subtile) {
981 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -0800982 for (uint32_t n = 1; n <= 8; n++) {
983 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard15eec022021-11-17 13:26:20 -0800984 GemmMicrokernelTester()
985 .mr(1)
986 .nr(8)
987 .kr(2)
988 .sr(1)
989 .m(m)
990 .n(n)
991 .k(16)
992 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800993 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -0800994 }
995 }
996 }
997
998 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_eq_16_subtile_m) {
999 TEST_REQUIRES_ARM_NEON;
1000 for (uint32_t m = 1; m <= 1; m++) {
1001 GemmMicrokernelTester()
1002 .mr(1)
1003 .nr(8)
1004 .kr(2)
1005 .sr(1)
1006 .m(m)
1007 .n(8)
1008 .k(16)
1009 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001010 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001011 }
1012 }
1013
1014 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_eq_16_subtile_n) {
1015 TEST_REQUIRES_ARM_NEON;
1016 for (uint32_t n = 1; n <= 8; n++) {
1017 GemmMicrokernelTester()
1018 .mr(1)
1019 .nr(8)
1020 .kr(2)
1021 .sr(1)
1022 .m(1)
1023 .n(n)
1024 .k(16)
1025 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001026 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001027 }
1028 }
1029
1030 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_lt_16) {
1031 TEST_REQUIRES_ARM_NEON;
1032 for (size_t k = 1; k < 16; k++) {
1033 GemmMicrokernelTester()
1034 .mr(1)
1035 .nr(8)
1036 .kr(2)
1037 .sr(1)
1038 .m(1)
1039 .n(8)
1040 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001041 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001042 }
1043 }
1044
1045 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_lt_16_strided_a) {
1046 TEST_REQUIRES_ARM_NEON;
1047 for (size_t k = 1; k < 16; k++) {
1048 GemmMicrokernelTester()
1049 .mr(1)
1050 .nr(8)
1051 .kr(2)
1052 .sr(1)
1053 .m(1)
1054 .n(8)
1055 .k(k)
1056 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08001057 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001058 }
1059 }
1060
1061 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_lt_16_subtile) {
1062 TEST_REQUIRES_ARM_NEON;
1063 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001064 for (uint32_t n = 1; n <= 8; n++) {
1065 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard15eec022021-11-17 13:26:20 -08001066 GemmMicrokernelTester()
1067 .mr(1)
1068 .nr(8)
1069 .kr(2)
1070 .sr(1)
1071 .m(m)
1072 .n(n)
1073 .k(k)
1074 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001075 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001076 }
1077 }
1078 }
1079 }
1080
1081 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_gt_16) {
1082 TEST_REQUIRES_ARM_NEON;
1083 for (size_t k = 17; k < 32; k++) {
1084 GemmMicrokernelTester()
1085 .mr(1)
1086 .nr(8)
1087 .kr(2)
1088 .sr(1)
1089 .m(1)
1090 .n(8)
1091 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001092 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001093 }
1094 }
1095
1096 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_gt_16_strided_a) {
1097 TEST_REQUIRES_ARM_NEON;
1098 for (size_t k = 17; k < 32; k++) {
1099 GemmMicrokernelTester()
1100 .mr(1)
1101 .nr(8)
1102 .kr(2)
1103 .sr(1)
1104 .m(1)
1105 .n(8)
1106 .k(k)
1107 .a_stride(37)
Marat Dukhan50323b82022-01-11 00:12:01 -08001108 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001109 }
1110 }
1111
1112 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_gt_16_subtile) {
1113 TEST_REQUIRES_ARM_NEON;
1114 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001115 for (uint32_t n = 1; n <= 8; n++) {
1116 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard15eec022021-11-17 13:26:20 -08001117 GemmMicrokernelTester()
1118 .mr(1)
1119 .nr(8)
1120 .kr(2)
1121 .sr(1)
1122 .m(m)
1123 .n(n)
1124 .k(k)
1125 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001126 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001127 }
1128 }
1129 }
1130 }
1131
1132 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_div_16) {
1133 TEST_REQUIRES_ARM_NEON;
1134 for (size_t k = 32; k <= 160; k += 16) {
1135 GemmMicrokernelTester()
1136 .mr(1)
1137 .nr(8)
1138 .kr(2)
1139 .sr(1)
1140 .m(1)
1141 .n(8)
1142 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001143 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001144 }
1145 }
1146
1147 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_div_16_strided_a) {
1148 TEST_REQUIRES_ARM_NEON;
1149 for (size_t k = 32; k <= 160; k += 16) {
1150 GemmMicrokernelTester()
1151 .mr(1)
1152 .nr(8)
1153 .kr(2)
1154 .sr(1)
1155 .m(1)
1156 .n(8)
1157 .k(k)
1158 .a_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -08001159 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001160 }
1161 }
1162
1163 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_div_16_subtile) {
1164 TEST_REQUIRES_ARM_NEON;
1165 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001166 for (uint32_t n = 1; n <= 8; n++) {
1167 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard15eec022021-11-17 13:26:20 -08001168 GemmMicrokernelTester()
1169 .mr(1)
1170 .nr(8)
1171 .kr(2)
1172 .sr(1)
1173 .m(m)
1174 .n(n)
1175 .k(k)
1176 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001177 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001178 }
1179 }
1180 }
1181 }
1182
1183 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, n_gt_8) {
1184 TEST_REQUIRES_ARM_NEON;
1185 for (uint32_t n = 9; n < 16; n++) {
1186 for (size_t k = 1; k <= 80; k += 17) {
1187 GemmMicrokernelTester()
1188 .mr(1)
1189 .nr(8)
1190 .kr(2)
1191 .sr(1)
1192 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001193 .n(n)
Frank Barchard15eec022021-11-17 13:26:20 -08001194 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001195 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001196 }
1197 }
1198 }
1199
1200 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, n_gt_8_strided_cn) {
1201 TEST_REQUIRES_ARM_NEON;
1202 for (uint32_t n = 9; n < 16; n++) {
1203 for (size_t k = 1; k <= 80; k += 17) {
1204 GemmMicrokernelTester()
1205 .mr(1)
1206 .nr(8)
1207 .kr(2)
1208 .sr(1)
1209 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001210 .n(n)
Frank Barchard15eec022021-11-17 13:26:20 -08001211 .k(k)
1212 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08001213 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001214 }
1215 }
1216 }
1217
1218 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, n_gt_8_strided_a) {
1219 TEST_REQUIRES_ARM_NEON;
1220 for (uint32_t n = 9; n < 16; n++) {
1221 for (size_t k = 1; k <= 80; k += 17) {
1222 GemmMicrokernelTester()
1223 .mr(1)
1224 .nr(8)
1225 .kr(2)
1226 .sr(1)
1227 .m(1)
1228 .n(n)
1229 .k(k)
1230 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08001231 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001232 }
1233 }
1234 }
1235
1236 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, n_gt_8_subtile) {
1237 TEST_REQUIRES_ARM_NEON;
1238 for (uint32_t n = 9; n < 16; n++) {
1239 for (size_t k = 1; k <= 80; k += 17) {
1240 for (uint32_t m = 1; m <= 1; m++) {
1241 GemmMicrokernelTester()
1242 .mr(1)
1243 .nr(8)
1244 .kr(2)
1245 .sr(1)
1246 .m(m)
1247 .n(n)
1248 .k(k)
1249 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001250 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001251 }
1252 }
1253 }
1254 }
1255
1256 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, n_div_8) {
1257 TEST_REQUIRES_ARM_NEON;
1258 for (uint32_t n = 16; n <= 24; n += 8) {
1259 for (size_t k = 1; k <= 80; k += 17) {
1260 GemmMicrokernelTester()
1261 .mr(1)
1262 .nr(8)
1263 .kr(2)
1264 .sr(1)
1265 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001266 .n(n)
Frank Barchard15eec022021-11-17 13:26:20 -08001267 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001268 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001269 }
1270 }
1271 }
1272
1273 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, n_div_8_strided_cn) {
1274 TEST_REQUIRES_ARM_NEON;
1275 for (uint32_t n = 16; n <= 24; n += 8) {
1276 for (size_t k = 1; k <= 80; k += 17) {
1277 GemmMicrokernelTester()
1278 .mr(1)
1279 .nr(8)
1280 .kr(2)
1281 .sr(1)
1282 .m(1)
1283 .n(n)
1284 .k(k)
1285 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08001286 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001287 }
1288 }
1289 }
1290
1291 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, n_div_8_strided_a) {
1292 TEST_REQUIRES_ARM_NEON;
1293 for (uint32_t n = 16; n <= 24; n += 8) {
1294 for (size_t k = 1; k <= 80; k += 17) {
1295 GemmMicrokernelTester()
1296 .mr(1)
1297 .nr(8)
1298 .kr(2)
1299 .sr(1)
1300 .m(1)
1301 .n(n)
1302 .k(k)
1303 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08001304 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001305 }
1306 }
1307 }
1308
1309 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, n_div_8_subtile) {
1310 TEST_REQUIRES_ARM_NEON;
1311 for (uint32_t n = 16; n <= 24; n += 8) {
1312 for (size_t k = 1; k <= 80; k += 17) {
1313 for (uint32_t m = 1; m <= 1; m++) {
1314 GemmMicrokernelTester()
1315 .mr(1)
1316 .nr(8)
1317 .kr(2)
1318 .sr(1)
1319 .m(m)
1320 .n(n)
1321 .k(k)
1322 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001323 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001324 }
1325 }
1326 }
1327 }
1328
1329 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, strided_cm_subtile) {
1330 TEST_REQUIRES_ARM_NEON;
1331 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001332 for (uint32_t n = 1; n <= 8; n++) {
1333 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard15eec022021-11-17 13:26:20 -08001334 GemmMicrokernelTester()
1335 .mr(1)
1336 .nr(8)
1337 .kr(2)
1338 .sr(1)
1339 .m(m)
1340 .n(n)
1341 .k(k)
1342 .cm_stride(11)
1343 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001344 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001345 }
1346 }
1347 }
1348 }
1349
1350 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, qmin) {
1351 TEST_REQUIRES_ARM_NEON;
1352 GemmMicrokernelTester()
1353 .mr(1)
1354 .nr(8)
1355 .kr(2)
1356 .sr(1)
1357 .m(1)
1358 .n(8)
1359 .k(16)
1360 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001361 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001362 }
1363
1364 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, qmax) {
1365 TEST_REQUIRES_ARM_NEON;
1366 GemmMicrokernelTester()
1367 .mr(1)
1368 .nr(8)
1369 .kr(2)
1370 .sr(1)
1371 .m(1)
1372 .n(8)
1373 .k(16)
1374 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001375 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001376 }
1377
1378 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, strided_cm) {
1379 TEST_REQUIRES_ARM_NEON;
1380 GemmMicrokernelTester()
1381 .mr(1)
1382 .nr(8)
1383 .kr(2)
1384 .sr(1)
1385 .m(1)
1386 .n(8)
1387 .k(16)
1388 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08001389 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001390 }
1391#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1392
1393
1394#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Frank Barchard15eec022021-11-17 13:26:20 -08001395 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, k_eq_16) {
1396 TEST_REQUIRES_ARM_NEON_V8;
1397 GemmMicrokernelTester()
1398 .mr(1)
1399 .nr(8)
1400 .kr(2)
1401 .sr(1)
1402 .m(1)
1403 .n(8)
1404 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08001405 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001406 }
1407
1408 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, strided_cn) {
1409 TEST_REQUIRES_ARM_NEON_V8;
1410 GemmMicrokernelTester()
1411 .mr(1)
1412 .nr(8)
1413 .kr(2)
1414 .sr(1)
1415 .m(1)
1416 .n(8)
1417 .k(16)
1418 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08001419 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001420 }
1421
1422 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, k_eq_16_strided_a) {
1423 TEST_REQUIRES_ARM_NEON_V8;
1424 GemmMicrokernelTester()
1425 .mr(1)
1426 .nr(8)
1427 .kr(2)
1428 .sr(1)
1429 .m(1)
1430 .n(8)
1431 .k(16)
1432 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08001433 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001434 }
1435
1436 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, k_eq_16_subtile) {
1437 TEST_REQUIRES_ARM_NEON_V8;
Zhi An Ng83844ae2022-01-14 09:52:25 -08001438 for (uint32_t n = 1; n <= 8; n++) {
1439 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard15eec022021-11-17 13:26:20 -08001440 GemmMicrokernelTester()
1441 .mr(1)
1442 .nr(8)
1443 .kr(2)
1444 .sr(1)
1445 .m(m)
1446 .n(n)
1447 .k(16)
1448 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001449 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001450 }
1451 }
1452 }
1453
1454 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, k_eq_16_subtile_m) {
1455 TEST_REQUIRES_ARM_NEON_V8;
1456 for (uint32_t m = 1; m <= 1; m++) {
1457 GemmMicrokernelTester()
1458 .mr(1)
1459 .nr(8)
1460 .kr(2)
1461 .sr(1)
1462 .m(m)
1463 .n(8)
1464 .k(16)
1465 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001466 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001467 }
1468 }
1469
1470 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, k_eq_16_subtile_n) {
1471 TEST_REQUIRES_ARM_NEON_V8;
1472 for (uint32_t n = 1; n <= 8; n++) {
1473 GemmMicrokernelTester()
1474 .mr(1)
1475 .nr(8)
1476 .kr(2)
1477 .sr(1)
1478 .m(1)
1479 .n(n)
1480 .k(16)
1481 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001482 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001483 }
1484 }
1485
1486 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, k_lt_16) {
1487 TEST_REQUIRES_ARM_NEON_V8;
1488 for (size_t k = 1; k < 16; k++) {
1489 GemmMicrokernelTester()
1490 .mr(1)
1491 .nr(8)
1492 .kr(2)
1493 .sr(1)
1494 .m(1)
1495 .n(8)
1496 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001497 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001498 }
1499 }
1500
1501 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, k_lt_16_strided_a) {
1502 TEST_REQUIRES_ARM_NEON_V8;
1503 for (size_t k = 1; k < 16; k++) {
1504 GemmMicrokernelTester()
1505 .mr(1)
1506 .nr(8)
1507 .kr(2)
1508 .sr(1)
1509 .m(1)
1510 .n(8)
1511 .k(k)
1512 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08001513 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001514 }
1515 }
1516
1517 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, k_lt_16_subtile) {
1518 TEST_REQUIRES_ARM_NEON_V8;
1519 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001520 for (uint32_t n = 1; n <= 8; n++) {
1521 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard15eec022021-11-17 13:26:20 -08001522 GemmMicrokernelTester()
1523 .mr(1)
1524 .nr(8)
1525 .kr(2)
1526 .sr(1)
1527 .m(m)
1528 .n(n)
1529 .k(k)
1530 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001531 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001532 }
1533 }
1534 }
1535 }
1536
1537 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, k_gt_16) {
1538 TEST_REQUIRES_ARM_NEON_V8;
1539 for (size_t k = 17; k < 32; k++) {
1540 GemmMicrokernelTester()
1541 .mr(1)
1542 .nr(8)
1543 .kr(2)
1544 .sr(1)
1545 .m(1)
1546 .n(8)
1547 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001548 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001549 }
1550 }
1551
1552 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, k_gt_16_strided_a) {
1553 TEST_REQUIRES_ARM_NEON_V8;
1554 for (size_t k = 17; k < 32; k++) {
1555 GemmMicrokernelTester()
1556 .mr(1)
1557 .nr(8)
1558 .kr(2)
1559 .sr(1)
1560 .m(1)
1561 .n(8)
1562 .k(k)
1563 .a_stride(37)
Marat Dukhan50323b82022-01-11 00:12:01 -08001564 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001565 }
1566 }
1567
1568 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, k_gt_16_subtile) {
1569 TEST_REQUIRES_ARM_NEON_V8;
1570 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001571 for (uint32_t n = 1; n <= 8; n++) {
1572 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard15eec022021-11-17 13:26:20 -08001573 GemmMicrokernelTester()
1574 .mr(1)
1575 .nr(8)
1576 .kr(2)
1577 .sr(1)
1578 .m(m)
1579 .n(n)
1580 .k(k)
1581 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001582 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001583 }
1584 }
1585 }
1586 }
1587
1588 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, k_div_16) {
1589 TEST_REQUIRES_ARM_NEON_V8;
1590 for (size_t k = 32; k <= 160; k += 16) {
1591 GemmMicrokernelTester()
1592 .mr(1)
1593 .nr(8)
1594 .kr(2)
1595 .sr(1)
1596 .m(1)
1597 .n(8)
1598 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001599 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001600 }
1601 }
1602
1603 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, k_div_16_strided_a) {
1604 TEST_REQUIRES_ARM_NEON_V8;
1605 for (size_t k = 32; k <= 160; k += 16) {
1606 GemmMicrokernelTester()
1607 .mr(1)
1608 .nr(8)
1609 .kr(2)
1610 .sr(1)
1611 .m(1)
1612 .n(8)
1613 .k(k)
1614 .a_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -08001615 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001616 }
1617 }
1618
1619 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, k_div_16_subtile) {
1620 TEST_REQUIRES_ARM_NEON_V8;
1621 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001622 for (uint32_t n = 1; n <= 8; n++) {
1623 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard15eec022021-11-17 13:26:20 -08001624 GemmMicrokernelTester()
1625 .mr(1)
1626 .nr(8)
1627 .kr(2)
1628 .sr(1)
1629 .m(m)
1630 .n(n)
1631 .k(k)
1632 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001633 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001634 }
1635 }
1636 }
1637 }
1638
1639 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, n_gt_8) {
1640 TEST_REQUIRES_ARM_NEON_V8;
1641 for (uint32_t n = 9; n < 16; n++) {
1642 for (size_t k = 1; k <= 80; k += 17) {
1643 GemmMicrokernelTester()
1644 .mr(1)
1645 .nr(8)
1646 .kr(2)
1647 .sr(1)
1648 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001649 .n(n)
Frank Barchard15eec022021-11-17 13:26:20 -08001650 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001651 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001652 }
1653 }
1654 }
1655
1656 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, n_gt_8_strided_cn) {
1657 TEST_REQUIRES_ARM_NEON_V8;
1658 for (uint32_t n = 9; n < 16; n++) {
1659 for (size_t k = 1; k <= 80; k += 17) {
1660 GemmMicrokernelTester()
1661 .mr(1)
1662 .nr(8)
1663 .kr(2)
1664 .sr(1)
1665 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001666 .n(n)
Frank Barchard15eec022021-11-17 13:26:20 -08001667 .k(k)
1668 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08001669 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001670 }
1671 }
1672 }
1673
1674 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, n_gt_8_strided_a) {
1675 TEST_REQUIRES_ARM_NEON_V8;
1676 for (uint32_t n = 9; n < 16; n++) {
1677 for (size_t k = 1; k <= 80; k += 17) {
1678 GemmMicrokernelTester()
1679 .mr(1)
1680 .nr(8)
1681 .kr(2)
1682 .sr(1)
1683 .m(1)
1684 .n(n)
1685 .k(k)
1686 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08001687 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001688 }
1689 }
1690 }
1691
1692 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, n_gt_8_subtile) {
1693 TEST_REQUIRES_ARM_NEON_V8;
1694 for (uint32_t n = 9; n < 16; n++) {
1695 for (size_t k = 1; k <= 80; k += 17) {
1696 for (uint32_t m = 1; m <= 1; m++) {
1697 GemmMicrokernelTester()
1698 .mr(1)
1699 .nr(8)
1700 .kr(2)
1701 .sr(1)
1702 .m(m)
1703 .n(n)
1704 .k(k)
1705 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001706 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001707 }
1708 }
1709 }
1710 }
1711
1712 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, n_div_8) {
1713 TEST_REQUIRES_ARM_NEON_V8;
1714 for (uint32_t n = 16; n <= 24; n += 8) {
1715 for (size_t k = 1; k <= 80; k += 17) {
1716 GemmMicrokernelTester()
1717 .mr(1)
1718 .nr(8)
1719 .kr(2)
1720 .sr(1)
1721 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001722 .n(n)
Frank Barchard15eec022021-11-17 13:26:20 -08001723 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001724 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001725 }
1726 }
1727 }
1728
1729 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, n_div_8_strided_cn) {
1730 TEST_REQUIRES_ARM_NEON_V8;
1731 for (uint32_t n = 16; n <= 24; n += 8) {
1732 for (size_t k = 1; k <= 80; k += 17) {
1733 GemmMicrokernelTester()
1734 .mr(1)
1735 .nr(8)
1736 .kr(2)
1737 .sr(1)
1738 .m(1)
1739 .n(n)
1740 .k(k)
1741 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08001742 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001743 }
1744 }
1745 }
1746
1747 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, n_div_8_strided_a) {
1748 TEST_REQUIRES_ARM_NEON_V8;
1749 for (uint32_t n = 16; n <= 24; n += 8) {
1750 for (size_t k = 1; k <= 80; k += 17) {
1751 GemmMicrokernelTester()
1752 .mr(1)
1753 .nr(8)
1754 .kr(2)
1755 .sr(1)
1756 .m(1)
1757 .n(n)
1758 .k(k)
1759 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08001760 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001761 }
1762 }
1763 }
1764
1765 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, n_div_8_subtile) {
1766 TEST_REQUIRES_ARM_NEON_V8;
1767 for (uint32_t n = 16; n <= 24; n += 8) {
1768 for (size_t k = 1; k <= 80; k += 17) {
1769 for (uint32_t m = 1; m <= 1; m++) {
1770 GemmMicrokernelTester()
1771 .mr(1)
1772 .nr(8)
1773 .kr(2)
1774 .sr(1)
1775 .m(m)
1776 .n(n)
1777 .k(k)
1778 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001779 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001780 }
1781 }
1782 }
1783 }
1784
1785 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, strided_cm_subtile) {
1786 TEST_REQUIRES_ARM_NEON_V8;
1787 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001788 for (uint32_t n = 1; n <= 8; n++) {
1789 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard15eec022021-11-17 13:26:20 -08001790 GemmMicrokernelTester()
1791 .mr(1)
1792 .nr(8)
1793 .kr(2)
1794 .sr(1)
1795 .m(m)
1796 .n(n)
1797 .k(k)
1798 .cm_stride(11)
1799 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001800 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001801 }
1802 }
1803 }
1804 }
1805
1806 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, qmin) {
1807 TEST_REQUIRES_ARM_NEON_V8;
1808 GemmMicrokernelTester()
1809 .mr(1)
1810 .nr(8)
1811 .kr(2)
1812 .sr(1)
1813 .m(1)
1814 .n(8)
1815 .k(16)
1816 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001817 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001818 }
1819
1820 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, qmax) {
1821 TEST_REQUIRES_ARM_NEON_V8;
1822 GemmMicrokernelTester()
1823 .mr(1)
1824 .nr(8)
1825 .kr(2)
1826 .sr(1)
1827 .m(1)
1828 .n(8)
1829 .k(16)
1830 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001831 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001832 }
1833
1834 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, strided_cm) {
1835 TEST_REQUIRES_ARM_NEON_V8;
1836 GemmMicrokernelTester()
1837 .mr(1)
1838 .nr(8)
1839 .kr(2)
1840 .sr(1)
1841 .m(1)
1842 .n(8)
1843 .k(16)
1844 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08001845 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard15eec022021-11-17 13:26:20 -08001846 }
1847#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1848
1849
1850#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Frank Barchard42f5c502021-11-16 10:04:21 -08001851 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_eq_16) {
1852 TEST_REQUIRES_ARM_NEON;
1853 GemmMicrokernelTester()
1854 .mr(1)
1855 .nr(8)
1856 .kr(2)
1857 .sr(1)
1858 .m(1)
1859 .n(8)
1860 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08001861 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08001862 }
1863
1864 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, strided_cn) {
1865 TEST_REQUIRES_ARM_NEON;
1866 GemmMicrokernelTester()
1867 .mr(1)
1868 .nr(8)
1869 .kr(2)
1870 .sr(1)
1871 .m(1)
1872 .n(8)
1873 .k(16)
1874 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08001875 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08001876 }
1877
1878 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_eq_16_strided_a) {
1879 TEST_REQUIRES_ARM_NEON;
1880 GemmMicrokernelTester()
1881 .mr(1)
1882 .nr(8)
1883 .kr(2)
1884 .sr(1)
1885 .m(1)
1886 .n(8)
1887 .k(16)
1888 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08001889 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08001890 }
1891
1892 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_eq_16_subtile) {
1893 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08001894 for (uint32_t n = 1; n <= 8; n++) {
1895 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard42f5c502021-11-16 10:04:21 -08001896 GemmMicrokernelTester()
1897 .mr(1)
1898 .nr(8)
1899 .kr(2)
1900 .sr(1)
1901 .m(m)
1902 .n(n)
1903 .k(16)
1904 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001905 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08001906 }
1907 }
1908 }
1909
1910 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_eq_16_subtile_m) {
1911 TEST_REQUIRES_ARM_NEON;
1912 for (uint32_t m = 1; m <= 1; m++) {
1913 GemmMicrokernelTester()
1914 .mr(1)
1915 .nr(8)
1916 .kr(2)
1917 .sr(1)
1918 .m(m)
1919 .n(8)
1920 .k(16)
1921 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001922 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08001923 }
1924 }
1925
1926 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_eq_16_subtile_n) {
1927 TEST_REQUIRES_ARM_NEON;
1928 for (uint32_t n = 1; n <= 8; n++) {
1929 GemmMicrokernelTester()
1930 .mr(1)
1931 .nr(8)
1932 .kr(2)
1933 .sr(1)
1934 .m(1)
1935 .n(n)
1936 .k(16)
1937 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001938 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08001939 }
1940 }
1941
1942 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_lt_16) {
1943 TEST_REQUIRES_ARM_NEON;
1944 for (size_t k = 1; k < 16; k++) {
1945 GemmMicrokernelTester()
1946 .mr(1)
1947 .nr(8)
1948 .kr(2)
1949 .sr(1)
1950 .m(1)
1951 .n(8)
1952 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001953 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08001954 }
1955 }
1956
1957 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_lt_16_strided_a) {
1958 TEST_REQUIRES_ARM_NEON;
1959 for (size_t k = 1; k < 16; k++) {
1960 GemmMicrokernelTester()
1961 .mr(1)
1962 .nr(8)
1963 .kr(2)
1964 .sr(1)
1965 .m(1)
1966 .n(8)
1967 .k(k)
1968 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08001969 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08001970 }
1971 }
1972
1973 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_lt_16_subtile) {
1974 TEST_REQUIRES_ARM_NEON;
1975 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001976 for (uint32_t n = 1; n <= 8; n++) {
1977 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard42f5c502021-11-16 10:04:21 -08001978 GemmMicrokernelTester()
1979 .mr(1)
1980 .nr(8)
1981 .kr(2)
1982 .sr(1)
1983 .m(m)
1984 .n(n)
1985 .k(k)
1986 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001987 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08001988 }
1989 }
1990 }
1991 }
1992
1993 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_gt_16) {
1994 TEST_REQUIRES_ARM_NEON;
1995 for (size_t k = 17; k < 32; k++) {
1996 GemmMicrokernelTester()
1997 .mr(1)
1998 .nr(8)
1999 .kr(2)
2000 .sr(1)
2001 .m(1)
2002 .n(8)
2003 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002004 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002005 }
2006 }
2007
2008 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_gt_16_strided_a) {
2009 TEST_REQUIRES_ARM_NEON;
2010 for (size_t k = 17; k < 32; k++) {
2011 GemmMicrokernelTester()
2012 .mr(1)
2013 .nr(8)
2014 .kr(2)
2015 .sr(1)
2016 .m(1)
2017 .n(8)
2018 .k(k)
2019 .a_stride(37)
Marat Dukhan50323b82022-01-11 00:12:01 -08002020 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002021 }
2022 }
2023
2024 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_gt_16_subtile) {
2025 TEST_REQUIRES_ARM_NEON;
2026 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002027 for (uint32_t n = 1; n <= 8; n++) {
2028 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard42f5c502021-11-16 10:04:21 -08002029 GemmMicrokernelTester()
2030 .mr(1)
2031 .nr(8)
2032 .kr(2)
2033 .sr(1)
2034 .m(m)
2035 .n(n)
2036 .k(k)
2037 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002038 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002039 }
2040 }
2041 }
2042 }
2043
2044 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_div_16) {
2045 TEST_REQUIRES_ARM_NEON;
2046 for (size_t k = 32; k <= 160; k += 16) {
2047 GemmMicrokernelTester()
2048 .mr(1)
2049 .nr(8)
2050 .kr(2)
2051 .sr(1)
2052 .m(1)
2053 .n(8)
2054 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002055 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002056 }
2057 }
2058
2059 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_div_16_strided_a) {
2060 TEST_REQUIRES_ARM_NEON;
2061 for (size_t k = 32; k <= 160; k += 16) {
2062 GemmMicrokernelTester()
2063 .mr(1)
2064 .nr(8)
2065 .kr(2)
2066 .sr(1)
2067 .m(1)
2068 .n(8)
2069 .k(k)
2070 .a_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -08002071 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002072 }
2073 }
2074
2075 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_div_16_subtile) {
2076 TEST_REQUIRES_ARM_NEON;
2077 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002078 for (uint32_t n = 1; n <= 8; n++) {
2079 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard42f5c502021-11-16 10:04:21 -08002080 GemmMicrokernelTester()
2081 .mr(1)
2082 .nr(8)
2083 .kr(2)
2084 .sr(1)
2085 .m(m)
2086 .n(n)
2087 .k(k)
2088 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002089 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002090 }
2091 }
2092 }
2093 }
2094
2095 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, n_gt_8) {
2096 TEST_REQUIRES_ARM_NEON;
2097 for (uint32_t n = 9; n < 16; n++) {
2098 for (size_t k = 1; k <= 80; k += 17) {
2099 GemmMicrokernelTester()
2100 .mr(1)
2101 .nr(8)
2102 .kr(2)
2103 .sr(1)
2104 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002105 .n(n)
Frank Barchard42f5c502021-11-16 10:04:21 -08002106 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002107 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002108 }
2109 }
2110 }
2111
2112 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, n_gt_8_strided_cn) {
2113 TEST_REQUIRES_ARM_NEON;
2114 for (uint32_t n = 9; n < 16; n++) {
2115 for (size_t k = 1; k <= 80; k += 17) {
2116 GemmMicrokernelTester()
2117 .mr(1)
2118 .nr(8)
2119 .kr(2)
2120 .sr(1)
2121 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002122 .n(n)
Frank Barchard42f5c502021-11-16 10:04:21 -08002123 .k(k)
2124 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08002125 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002126 }
2127 }
2128 }
2129
2130 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, n_gt_8_strided_a) {
2131 TEST_REQUIRES_ARM_NEON;
2132 for (uint32_t n = 9; n < 16; n++) {
2133 for (size_t k = 1; k <= 80; k += 17) {
2134 GemmMicrokernelTester()
2135 .mr(1)
2136 .nr(8)
2137 .kr(2)
2138 .sr(1)
2139 .m(1)
2140 .n(n)
2141 .k(k)
2142 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08002143 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002144 }
2145 }
2146 }
2147
2148 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, n_gt_8_subtile) {
2149 TEST_REQUIRES_ARM_NEON;
2150 for (uint32_t n = 9; n < 16; n++) {
2151 for (size_t k = 1; k <= 80; k += 17) {
2152 for (uint32_t m = 1; m <= 1; m++) {
2153 GemmMicrokernelTester()
2154 .mr(1)
2155 .nr(8)
2156 .kr(2)
2157 .sr(1)
2158 .m(m)
2159 .n(n)
2160 .k(k)
2161 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002162 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002163 }
2164 }
2165 }
2166 }
2167
2168 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, n_div_8) {
2169 TEST_REQUIRES_ARM_NEON;
2170 for (uint32_t n = 16; n <= 24; n += 8) {
2171 for (size_t k = 1; k <= 80; k += 17) {
2172 GemmMicrokernelTester()
2173 .mr(1)
2174 .nr(8)
2175 .kr(2)
2176 .sr(1)
2177 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002178 .n(n)
Frank Barchard42f5c502021-11-16 10:04:21 -08002179 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002180 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002181 }
2182 }
2183 }
2184
2185 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, n_div_8_strided_cn) {
2186 TEST_REQUIRES_ARM_NEON;
2187 for (uint32_t n = 16; n <= 24; n += 8) {
2188 for (size_t k = 1; k <= 80; k += 17) {
2189 GemmMicrokernelTester()
2190 .mr(1)
2191 .nr(8)
2192 .kr(2)
2193 .sr(1)
2194 .m(1)
2195 .n(n)
2196 .k(k)
2197 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08002198 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002199 }
2200 }
2201 }
2202
2203 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, n_div_8_strided_a) {
2204 TEST_REQUIRES_ARM_NEON;
2205 for (uint32_t n = 16; n <= 24; n += 8) {
2206 for (size_t k = 1; k <= 80; k += 17) {
2207 GemmMicrokernelTester()
2208 .mr(1)
2209 .nr(8)
2210 .kr(2)
2211 .sr(1)
2212 .m(1)
2213 .n(n)
2214 .k(k)
2215 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08002216 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002217 }
2218 }
2219 }
2220
2221 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, n_div_8_subtile) {
2222 TEST_REQUIRES_ARM_NEON;
2223 for (uint32_t n = 16; n <= 24; n += 8) {
2224 for (size_t k = 1; k <= 80; k += 17) {
2225 for (uint32_t m = 1; m <= 1; m++) {
2226 GemmMicrokernelTester()
2227 .mr(1)
2228 .nr(8)
2229 .kr(2)
2230 .sr(1)
2231 .m(m)
2232 .n(n)
2233 .k(k)
2234 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002235 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002236 }
2237 }
2238 }
2239 }
2240
2241 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, strided_cm_subtile) {
2242 TEST_REQUIRES_ARM_NEON;
2243 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002244 for (uint32_t n = 1; n <= 8; n++) {
2245 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard42f5c502021-11-16 10:04:21 -08002246 GemmMicrokernelTester()
2247 .mr(1)
2248 .nr(8)
2249 .kr(2)
2250 .sr(1)
2251 .m(m)
2252 .n(n)
2253 .k(k)
2254 .cm_stride(11)
2255 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002256 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002257 }
2258 }
2259 }
2260 }
2261
2262 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, qmin) {
2263 TEST_REQUIRES_ARM_NEON;
2264 GemmMicrokernelTester()
2265 .mr(1)
2266 .nr(8)
2267 .kr(2)
2268 .sr(1)
2269 .m(1)
2270 .n(8)
2271 .k(16)
2272 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002273 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002274 }
2275
2276 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, qmax) {
2277 TEST_REQUIRES_ARM_NEON;
2278 GemmMicrokernelTester()
2279 .mr(1)
2280 .nr(8)
2281 .kr(2)
2282 .sr(1)
2283 .m(1)
2284 .n(8)
2285 .k(16)
2286 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002287 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002288 }
2289
2290 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, strided_cm) {
2291 TEST_REQUIRES_ARM_NEON;
2292 GemmMicrokernelTester()
2293 .mr(1)
2294 .nr(8)
2295 .kr(2)
2296 .sr(1)
2297 .m(1)
2298 .n(8)
2299 .k(16)
2300 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08002301 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002302 }
2303#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2304
2305
2306#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Frank Barchard42f5c502021-11-16 10:04:21 -08002307 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, k_eq_16) {
2308 TEST_REQUIRES_ARM_NEON_V8;
2309 GemmMicrokernelTester()
2310 .mr(1)
2311 .nr(8)
2312 .kr(2)
2313 .sr(1)
2314 .m(1)
2315 .n(8)
2316 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08002317 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002318 }
2319
2320 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, strided_cn) {
2321 TEST_REQUIRES_ARM_NEON_V8;
2322 GemmMicrokernelTester()
2323 .mr(1)
2324 .nr(8)
2325 .kr(2)
2326 .sr(1)
2327 .m(1)
2328 .n(8)
2329 .k(16)
2330 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08002331 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002332 }
2333
2334 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, k_eq_16_strided_a) {
2335 TEST_REQUIRES_ARM_NEON_V8;
2336 GemmMicrokernelTester()
2337 .mr(1)
2338 .nr(8)
2339 .kr(2)
2340 .sr(1)
2341 .m(1)
2342 .n(8)
2343 .k(16)
2344 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08002345 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002346 }
2347
2348 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, k_eq_16_subtile) {
2349 TEST_REQUIRES_ARM_NEON_V8;
Zhi An Ng83844ae2022-01-14 09:52:25 -08002350 for (uint32_t n = 1; n <= 8; n++) {
2351 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard42f5c502021-11-16 10:04:21 -08002352 GemmMicrokernelTester()
2353 .mr(1)
2354 .nr(8)
2355 .kr(2)
2356 .sr(1)
2357 .m(m)
2358 .n(n)
2359 .k(16)
2360 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002361 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002362 }
2363 }
2364 }
2365
2366 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, k_eq_16_subtile_m) {
2367 TEST_REQUIRES_ARM_NEON_V8;
2368 for (uint32_t m = 1; m <= 1; m++) {
2369 GemmMicrokernelTester()
2370 .mr(1)
2371 .nr(8)
2372 .kr(2)
2373 .sr(1)
2374 .m(m)
2375 .n(8)
2376 .k(16)
2377 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002378 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002379 }
2380 }
2381
2382 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, k_eq_16_subtile_n) {
2383 TEST_REQUIRES_ARM_NEON_V8;
2384 for (uint32_t n = 1; n <= 8; n++) {
2385 GemmMicrokernelTester()
2386 .mr(1)
2387 .nr(8)
2388 .kr(2)
2389 .sr(1)
2390 .m(1)
2391 .n(n)
2392 .k(16)
2393 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002394 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002395 }
2396 }
2397
2398 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, k_lt_16) {
2399 TEST_REQUIRES_ARM_NEON_V8;
2400 for (size_t k = 1; k < 16; k++) {
2401 GemmMicrokernelTester()
2402 .mr(1)
2403 .nr(8)
2404 .kr(2)
2405 .sr(1)
2406 .m(1)
2407 .n(8)
2408 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002409 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002410 }
2411 }
2412
2413 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, k_lt_16_strided_a) {
2414 TEST_REQUIRES_ARM_NEON_V8;
2415 for (size_t k = 1; k < 16; k++) {
2416 GemmMicrokernelTester()
2417 .mr(1)
2418 .nr(8)
2419 .kr(2)
2420 .sr(1)
2421 .m(1)
2422 .n(8)
2423 .k(k)
2424 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08002425 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002426 }
2427 }
2428
2429 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, k_lt_16_subtile) {
2430 TEST_REQUIRES_ARM_NEON_V8;
2431 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002432 for (uint32_t n = 1; n <= 8; n++) {
2433 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard42f5c502021-11-16 10:04:21 -08002434 GemmMicrokernelTester()
2435 .mr(1)
2436 .nr(8)
2437 .kr(2)
2438 .sr(1)
2439 .m(m)
2440 .n(n)
2441 .k(k)
2442 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002443 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002444 }
2445 }
2446 }
2447 }
2448
2449 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, k_gt_16) {
2450 TEST_REQUIRES_ARM_NEON_V8;
2451 for (size_t k = 17; k < 32; k++) {
2452 GemmMicrokernelTester()
2453 .mr(1)
2454 .nr(8)
2455 .kr(2)
2456 .sr(1)
2457 .m(1)
2458 .n(8)
2459 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002460 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002461 }
2462 }
2463
2464 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, k_gt_16_strided_a) {
2465 TEST_REQUIRES_ARM_NEON_V8;
2466 for (size_t k = 17; k < 32; k++) {
2467 GemmMicrokernelTester()
2468 .mr(1)
2469 .nr(8)
2470 .kr(2)
2471 .sr(1)
2472 .m(1)
2473 .n(8)
2474 .k(k)
2475 .a_stride(37)
Marat Dukhan50323b82022-01-11 00:12:01 -08002476 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002477 }
2478 }
2479
2480 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, k_gt_16_subtile) {
2481 TEST_REQUIRES_ARM_NEON_V8;
2482 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002483 for (uint32_t n = 1; n <= 8; n++) {
2484 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard42f5c502021-11-16 10:04:21 -08002485 GemmMicrokernelTester()
2486 .mr(1)
2487 .nr(8)
2488 .kr(2)
2489 .sr(1)
2490 .m(m)
2491 .n(n)
2492 .k(k)
2493 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002494 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002495 }
2496 }
2497 }
2498 }
2499
2500 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, k_div_16) {
2501 TEST_REQUIRES_ARM_NEON_V8;
2502 for (size_t k = 32; k <= 160; k += 16) {
2503 GemmMicrokernelTester()
2504 .mr(1)
2505 .nr(8)
2506 .kr(2)
2507 .sr(1)
2508 .m(1)
2509 .n(8)
2510 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002511 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002512 }
2513 }
2514
2515 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, k_div_16_strided_a) {
2516 TEST_REQUIRES_ARM_NEON_V8;
2517 for (size_t k = 32; k <= 160; k += 16) {
2518 GemmMicrokernelTester()
2519 .mr(1)
2520 .nr(8)
2521 .kr(2)
2522 .sr(1)
2523 .m(1)
2524 .n(8)
2525 .k(k)
2526 .a_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -08002527 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002528 }
2529 }
2530
2531 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, k_div_16_subtile) {
2532 TEST_REQUIRES_ARM_NEON_V8;
2533 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002534 for (uint32_t n = 1; n <= 8; n++) {
2535 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard42f5c502021-11-16 10:04:21 -08002536 GemmMicrokernelTester()
2537 .mr(1)
2538 .nr(8)
2539 .kr(2)
2540 .sr(1)
2541 .m(m)
2542 .n(n)
2543 .k(k)
2544 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002545 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002546 }
2547 }
2548 }
2549 }
2550
2551 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, n_gt_8) {
2552 TEST_REQUIRES_ARM_NEON_V8;
2553 for (uint32_t n = 9; n < 16; n++) {
2554 for (size_t k = 1; k <= 80; k += 17) {
2555 GemmMicrokernelTester()
2556 .mr(1)
2557 .nr(8)
2558 .kr(2)
2559 .sr(1)
2560 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002561 .n(n)
Frank Barchard42f5c502021-11-16 10:04:21 -08002562 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002563 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002564 }
2565 }
2566 }
2567
2568 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, n_gt_8_strided_cn) {
2569 TEST_REQUIRES_ARM_NEON_V8;
2570 for (uint32_t n = 9; n < 16; n++) {
2571 for (size_t k = 1; k <= 80; k += 17) {
2572 GemmMicrokernelTester()
2573 .mr(1)
2574 .nr(8)
2575 .kr(2)
2576 .sr(1)
2577 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002578 .n(n)
Frank Barchard42f5c502021-11-16 10:04:21 -08002579 .k(k)
2580 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08002581 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002582 }
2583 }
2584 }
2585
2586 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, n_gt_8_strided_a) {
2587 TEST_REQUIRES_ARM_NEON_V8;
2588 for (uint32_t n = 9; n < 16; n++) {
2589 for (size_t k = 1; k <= 80; k += 17) {
2590 GemmMicrokernelTester()
2591 .mr(1)
2592 .nr(8)
2593 .kr(2)
2594 .sr(1)
2595 .m(1)
2596 .n(n)
2597 .k(k)
2598 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08002599 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002600 }
2601 }
2602 }
2603
2604 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, n_gt_8_subtile) {
2605 TEST_REQUIRES_ARM_NEON_V8;
2606 for (uint32_t n = 9; n < 16; n++) {
2607 for (size_t k = 1; k <= 80; k += 17) {
2608 for (uint32_t m = 1; m <= 1; m++) {
2609 GemmMicrokernelTester()
2610 .mr(1)
2611 .nr(8)
2612 .kr(2)
2613 .sr(1)
2614 .m(m)
2615 .n(n)
2616 .k(k)
2617 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002618 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002619 }
2620 }
2621 }
2622 }
2623
2624 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, n_div_8) {
2625 TEST_REQUIRES_ARM_NEON_V8;
2626 for (uint32_t n = 16; n <= 24; n += 8) {
2627 for (size_t k = 1; k <= 80; k += 17) {
2628 GemmMicrokernelTester()
2629 .mr(1)
2630 .nr(8)
2631 .kr(2)
2632 .sr(1)
2633 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002634 .n(n)
Frank Barchard42f5c502021-11-16 10:04:21 -08002635 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002636 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002637 }
2638 }
2639 }
2640
2641 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, n_div_8_strided_cn) {
2642 TEST_REQUIRES_ARM_NEON_V8;
2643 for (uint32_t n = 16; n <= 24; n += 8) {
2644 for (size_t k = 1; k <= 80; k += 17) {
2645 GemmMicrokernelTester()
2646 .mr(1)
2647 .nr(8)
2648 .kr(2)
2649 .sr(1)
2650 .m(1)
2651 .n(n)
2652 .k(k)
2653 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08002654 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002655 }
2656 }
2657 }
2658
2659 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, n_div_8_strided_a) {
2660 TEST_REQUIRES_ARM_NEON_V8;
2661 for (uint32_t n = 16; n <= 24; n += 8) {
2662 for (size_t k = 1; k <= 80; k += 17) {
2663 GemmMicrokernelTester()
2664 .mr(1)
2665 .nr(8)
2666 .kr(2)
2667 .sr(1)
2668 .m(1)
2669 .n(n)
2670 .k(k)
2671 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08002672 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002673 }
2674 }
2675 }
2676
2677 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, n_div_8_subtile) {
2678 TEST_REQUIRES_ARM_NEON_V8;
2679 for (uint32_t n = 16; n <= 24; n += 8) {
2680 for (size_t k = 1; k <= 80; k += 17) {
2681 for (uint32_t m = 1; m <= 1; m++) {
2682 GemmMicrokernelTester()
2683 .mr(1)
2684 .nr(8)
2685 .kr(2)
2686 .sr(1)
2687 .m(m)
2688 .n(n)
2689 .k(k)
2690 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002691 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002692 }
2693 }
2694 }
2695 }
2696
2697 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, strided_cm_subtile) {
2698 TEST_REQUIRES_ARM_NEON_V8;
2699 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002700 for (uint32_t n = 1; n <= 8; n++) {
2701 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard42f5c502021-11-16 10:04:21 -08002702 GemmMicrokernelTester()
2703 .mr(1)
2704 .nr(8)
2705 .kr(2)
2706 .sr(1)
2707 .m(m)
2708 .n(n)
2709 .k(k)
2710 .cm_stride(11)
2711 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002712 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002713 }
2714 }
2715 }
2716 }
2717
2718 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, qmin) {
2719 TEST_REQUIRES_ARM_NEON_V8;
2720 GemmMicrokernelTester()
2721 .mr(1)
2722 .nr(8)
2723 .kr(2)
2724 .sr(1)
2725 .m(1)
2726 .n(8)
2727 .k(16)
2728 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002729 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002730 }
2731
2732 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, qmax) {
2733 TEST_REQUIRES_ARM_NEON_V8;
2734 GemmMicrokernelTester()
2735 .mr(1)
2736 .nr(8)
2737 .kr(2)
2738 .sr(1)
2739 .m(1)
2740 .n(8)
2741 .k(16)
2742 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002743 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002744 }
2745
2746 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, strided_cm) {
2747 TEST_REQUIRES_ARM_NEON_V8;
2748 GemmMicrokernelTester()
2749 .mr(1)
2750 .nr(8)
2751 .kr(2)
2752 .sr(1)
2753 .m(1)
2754 .n(8)
2755 .k(16)
2756 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08002757 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard42f5c502021-11-16 10:04:21 -08002758 }
2759#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2760
2761
2762#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Frank Barchard64ab1b72021-11-22 10:57:40 -08002763 TEST(QS8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, k_eq_16) {
2764 TEST_REQUIRES_ARM_NEON;
2765 GemmMicrokernelTester()
2766 .mr(2)
2767 .nr(8)
2768 .kr(4)
2769 .sr(1)
2770 .m(2)
2771 .n(8)
2772 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08002773 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08002774 }
2775
2776 TEST(QS8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, strided_cn) {
2777 TEST_REQUIRES_ARM_NEON;
2778 GemmMicrokernelTester()
2779 .mr(2)
2780 .nr(8)
2781 .kr(4)
2782 .sr(1)
2783 .m(2)
2784 .n(8)
2785 .k(16)
2786 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08002787 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08002788 }
2789
2790 TEST(QS8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, k_eq_16_strided_a) {
2791 TEST_REQUIRES_ARM_NEON;
2792 GemmMicrokernelTester()
2793 .mr(2)
2794 .nr(8)
2795 .kr(4)
2796 .sr(1)
2797 .m(2)
2798 .n(8)
2799 .k(16)
2800 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08002801 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08002802 }
2803
2804 TEST(QS8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, k_eq_16_subtile) {
2805 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08002806 for (uint32_t n = 1; n <= 8; n++) {
2807 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -08002808 GemmMicrokernelTester()
2809 .mr(2)
2810 .nr(8)
2811 .kr(4)
2812 .sr(1)
2813 .m(m)
2814 .n(n)
2815 .k(16)
2816 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002817 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08002818 }
2819 }
2820 }
2821
2822 TEST(QS8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, k_eq_16_subtile_m) {
2823 TEST_REQUIRES_ARM_NEON;
2824 for (uint32_t m = 1; m <= 2; m++) {
2825 GemmMicrokernelTester()
2826 .mr(2)
2827 .nr(8)
2828 .kr(4)
2829 .sr(1)
2830 .m(m)
2831 .n(8)
2832 .k(16)
2833 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002834 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08002835 }
2836 }
2837
2838 TEST(QS8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, k_eq_16_subtile_n) {
2839 TEST_REQUIRES_ARM_NEON;
2840 for (uint32_t n = 1; n <= 8; n++) {
2841 GemmMicrokernelTester()
2842 .mr(2)
2843 .nr(8)
2844 .kr(4)
2845 .sr(1)
2846 .m(2)
2847 .n(n)
2848 .k(16)
2849 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002850 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08002851 }
2852 }
2853
2854 TEST(QS8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, k_lt_16) {
2855 TEST_REQUIRES_ARM_NEON;
2856 for (size_t k = 1; k < 16; k++) {
2857 GemmMicrokernelTester()
2858 .mr(2)
2859 .nr(8)
2860 .kr(4)
2861 .sr(1)
2862 .m(2)
2863 .n(8)
2864 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002865 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08002866 }
2867 }
2868
2869 TEST(QS8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, k_lt_16_strided_a) {
2870 TEST_REQUIRES_ARM_NEON;
2871 for (size_t k = 1; k < 16; k++) {
2872 GemmMicrokernelTester()
2873 .mr(2)
2874 .nr(8)
2875 .kr(4)
2876 .sr(1)
2877 .m(2)
2878 .n(8)
2879 .k(k)
2880 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08002881 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08002882 }
2883 }
2884
2885 TEST(QS8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, k_lt_16_subtile) {
2886 TEST_REQUIRES_ARM_NEON;
2887 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002888 for (uint32_t n = 1; n <= 8; n++) {
2889 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -08002890 GemmMicrokernelTester()
2891 .mr(2)
2892 .nr(8)
2893 .kr(4)
2894 .sr(1)
2895 .m(m)
2896 .n(n)
2897 .k(k)
2898 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002899 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08002900 }
2901 }
2902 }
2903 }
2904
2905 TEST(QS8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, k_gt_16) {
2906 TEST_REQUIRES_ARM_NEON;
2907 for (size_t k = 17; k < 32; k++) {
2908 GemmMicrokernelTester()
2909 .mr(2)
2910 .nr(8)
2911 .kr(4)
2912 .sr(1)
2913 .m(2)
2914 .n(8)
2915 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002916 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08002917 }
2918 }
2919
2920 TEST(QS8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, k_gt_16_strided_a) {
2921 TEST_REQUIRES_ARM_NEON;
2922 for (size_t k = 17; k < 32; k++) {
2923 GemmMicrokernelTester()
2924 .mr(2)
2925 .nr(8)
2926 .kr(4)
2927 .sr(1)
2928 .m(2)
2929 .n(8)
2930 .k(k)
2931 .a_stride(37)
Marat Dukhan50323b82022-01-11 00:12:01 -08002932 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08002933 }
2934 }
2935
2936 TEST(QS8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, k_gt_16_subtile) {
2937 TEST_REQUIRES_ARM_NEON;
2938 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002939 for (uint32_t n = 1; n <= 8; n++) {
2940 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -08002941 GemmMicrokernelTester()
2942 .mr(2)
2943 .nr(8)
2944 .kr(4)
2945 .sr(1)
2946 .m(m)
2947 .n(n)
2948 .k(k)
2949 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002950 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08002951 }
2952 }
2953 }
2954 }
2955
2956 TEST(QS8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, k_div_16) {
2957 TEST_REQUIRES_ARM_NEON;
2958 for (size_t k = 32; k <= 160; k += 16) {
2959 GemmMicrokernelTester()
2960 .mr(2)
2961 .nr(8)
2962 .kr(4)
2963 .sr(1)
2964 .m(2)
2965 .n(8)
2966 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002967 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08002968 }
2969 }
2970
2971 TEST(QS8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, k_div_16_strided_a) {
2972 TEST_REQUIRES_ARM_NEON;
2973 for (size_t k = 32; k <= 160; k += 16) {
2974 GemmMicrokernelTester()
2975 .mr(2)
2976 .nr(8)
2977 .kr(4)
2978 .sr(1)
2979 .m(2)
2980 .n(8)
2981 .k(k)
2982 .a_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -08002983 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08002984 }
2985 }
2986
2987 TEST(QS8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, k_div_16_subtile) {
2988 TEST_REQUIRES_ARM_NEON;
2989 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002990 for (uint32_t n = 1; n <= 8; n++) {
2991 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -08002992 GemmMicrokernelTester()
2993 .mr(2)
2994 .nr(8)
2995 .kr(4)
2996 .sr(1)
2997 .m(m)
2998 .n(n)
2999 .k(k)
3000 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003001 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08003002 }
3003 }
3004 }
3005 }
3006
3007 TEST(QS8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, n_gt_8) {
3008 TEST_REQUIRES_ARM_NEON;
3009 for (uint32_t n = 9; n < 16; n++) {
3010 for (size_t k = 1; k <= 80; k += 17) {
3011 GemmMicrokernelTester()
3012 .mr(2)
3013 .nr(8)
3014 .kr(4)
3015 .sr(1)
3016 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003017 .n(n)
Frank Barchard64ab1b72021-11-22 10:57:40 -08003018 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003019 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08003020 }
3021 }
3022 }
3023
3024 TEST(QS8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, n_gt_8_strided_cn) {
3025 TEST_REQUIRES_ARM_NEON;
3026 for (uint32_t n = 9; n < 16; n++) {
3027 for (size_t k = 1; k <= 80; k += 17) {
3028 GemmMicrokernelTester()
3029 .mr(2)
3030 .nr(8)
3031 .kr(4)
3032 .sr(1)
3033 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003034 .n(n)
Frank Barchard64ab1b72021-11-22 10:57:40 -08003035 .k(k)
3036 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08003037 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08003038 }
3039 }
3040 }
3041
3042 TEST(QS8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, n_gt_8_strided_a) {
3043 TEST_REQUIRES_ARM_NEON;
3044 for (uint32_t n = 9; n < 16; n++) {
3045 for (size_t k = 1; k <= 80; k += 17) {
3046 GemmMicrokernelTester()
3047 .mr(2)
3048 .nr(8)
3049 .kr(4)
3050 .sr(1)
3051 .m(2)
3052 .n(n)
3053 .k(k)
3054 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08003055 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08003056 }
3057 }
3058 }
3059
3060 TEST(QS8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, n_gt_8_subtile) {
3061 TEST_REQUIRES_ARM_NEON;
3062 for (uint32_t n = 9; n < 16; n++) {
3063 for (size_t k = 1; k <= 80; k += 17) {
3064 for (uint32_t m = 1; m <= 2; m++) {
3065 GemmMicrokernelTester()
3066 .mr(2)
3067 .nr(8)
3068 .kr(4)
3069 .sr(1)
3070 .m(m)
3071 .n(n)
3072 .k(k)
3073 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003074 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08003075 }
3076 }
3077 }
3078 }
3079
3080 TEST(QS8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, n_div_8) {
3081 TEST_REQUIRES_ARM_NEON;
3082 for (uint32_t n = 16; n <= 24; n += 8) {
3083 for (size_t k = 1; k <= 80; k += 17) {
3084 GemmMicrokernelTester()
3085 .mr(2)
3086 .nr(8)
3087 .kr(4)
3088 .sr(1)
3089 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003090 .n(n)
Frank Barchard64ab1b72021-11-22 10:57:40 -08003091 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003092 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08003093 }
3094 }
3095 }
3096
3097 TEST(QS8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, n_div_8_strided_cn) {
3098 TEST_REQUIRES_ARM_NEON;
3099 for (uint32_t n = 16; n <= 24; n += 8) {
3100 for (size_t k = 1; k <= 80; k += 17) {
3101 GemmMicrokernelTester()
3102 .mr(2)
3103 .nr(8)
3104 .kr(4)
3105 .sr(1)
3106 .m(2)
3107 .n(n)
3108 .k(k)
3109 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08003110 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08003111 }
3112 }
3113 }
3114
3115 TEST(QS8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, n_div_8_strided_a) {
3116 TEST_REQUIRES_ARM_NEON;
3117 for (uint32_t n = 16; n <= 24; n += 8) {
3118 for (size_t k = 1; k <= 80; k += 17) {
3119 GemmMicrokernelTester()
3120 .mr(2)
3121 .nr(8)
3122 .kr(4)
3123 .sr(1)
3124 .m(2)
3125 .n(n)
3126 .k(k)
3127 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08003128 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08003129 }
3130 }
3131 }
3132
3133 TEST(QS8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, n_div_8_subtile) {
3134 TEST_REQUIRES_ARM_NEON;
3135 for (uint32_t n = 16; n <= 24; n += 8) {
3136 for (size_t k = 1; k <= 80; k += 17) {
3137 for (uint32_t m = 1; m <= 2; m++) {
3138 GemmMicrokernelTester()
3139 .mr(2)
3140 .nr(8)
3141 .kr(4)
3142 .sr(1)
3143 .m(m)
3144 .n(n)
3145 .k(k)
3146 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003147 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08003148 }
3149 }
3150 }
3151 }
3152
3153 TEST(QS8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, strided_cm_subtile) {
3154 TEST_REQUIRES_ARM_NEON;
3155 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003156 for (uint32_t n = 1; n <= 8; n++) {
3157 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -08003158 GemmMicrokernelTester()
3159 .mr(2)
3160 .nr(8)
3161 .kr(4)
3162 .sr(1)
3163 .m(m)
3164 .n(n)
3165 .k(k)
3166 .cm_stride(11)
3167 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003168 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08003169 }
3170 }
3171 }
3172 }
3173
3174 TEST(QS8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, qmin) {
3175 TEST_REQUIRES_ARM_NEON;
3176 GemmMicrokernelTester()
3177 .mr(2)
3178 .nr(8)
3179 .kr(4)
3180 .sr(1)
3181 .m(2)
3182 .n(8)
3183 .k(16)
3184 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003185 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08003186 }
3187
3188 TEST(QS8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, qmax) {
3189 TEST_REQUIRES_ARM_NEON;
3190 GemmMicrokernelTester()
3191 .mr(2)
3192 .nr(8)
3193 .kr(4)
3194 .sr(1)
3195 .m(2)
3196 .n(8)
3197 .k(16)
3198 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003199 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08003200 }
3201
3202 TEST(QS8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, strided_cm) {
3203 TEST_REQUIRES_ARM_NEON;
3204 GemmMicrokernelTester()
3205 .mr(2)
3206 .nr(8)
3207 .kr(4)
3208 .sr(1)
3209 .m(2)
3210 .n(8)
3211 .k(16)
3212 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08003213 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08003214 }
3215#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
3216
3217
3218#if XNN_ARCH_ARM || XNN_ARCH_ARM64
3219 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, k_eq_16) {
3220 TEST_REQUIRES_ARM_NEON_V8;
3221 GemmMicrokernelTester()
3222 .mr(1)
3223 .nr(8)
3224 .kr(4)
3225 .sr(1)
3226 .m(1)
3227 .n(8)
3228 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08003229 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08003230 }
3231
3232 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, strided_cn) {
3233 TEST_REQUIRES_ARM_NEON_V8;
3234 GemmMicrokernelTester()
3235 .mr(1)
3236 .nr(8)
3237 .kr(4)
3238 .sr(1)
3239 .m(1)
3240 .n(8)
3241 .k(16)
3242 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08003243 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08003244 }
3245
3246 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, k_eq_16_strided_a) {
3247 TEST_REQUIRES_ARM_NEON_V8;
3248 GemmMicrokernelTester()
3249 .mr(1)
3250 .nr(8)
3251 .kr(4)
3252 .sr(1)
3253 .m(1)
3254 .n(8)
3255 .k(16)
3256 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08003257 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08003258 }
3259
3260 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, k_eq_16_subtile) {
3261 TEST_REQUIRES_ARM_NEON_V8;
Zhi An Ng83844ae2022-01-14 09:52:25 -08003262 for (uint32_t n = 1; n <= 8; n++) {
3263 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -08003264 GemmMicrokernelTester()
3265 .mr(1)
3266 .nr(8)
3267 .kr(4)
3268 .sr(1)
3269 .m(m)
3270 .n(n)
3271 .k(16)
3272 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003273 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08003274 }
3275 }
3276 }
3277
3278 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, k_eq_16_subtile_m) {
3279 TEST_REQUIRES_ARM_NEON_V8;
3280 for (uint32_t m = 1; m <= 1; m++) {
3281 GemmMicrokernelTester()
3282 .mr(1)
3283 .nr(8)
3284 .kr(4)
3285 .sr(1)
3286 .m(m)
3287 .n(8)
3288 .k(16)
3289 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003290 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08003291 }
3292 }
3293
3294 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, k_eq_16_subtile_n) {
3295 TEST_REQUIRES_ARM_NEON_V8;
3296 for (uint32_t n = 1; n <= 8; n++) {
3297 GemmMicrokernelTester()
3298 .mr(1)
3299 .nr(8)
3300 .kr(4)
3301 .sr(1)
3302 .m(1)
3303 .n(n)
3304 .k(16)
3305 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003306 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08003307 }
3308 }
3309
3310 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, k_lt_16) {
3311 TEST_REQUIRES_ARM_NEON_V8;
3312 for (size_t k = 1; k < 16; k++) {
3313 GemmMicrokernelTester()
3314 .mr(1)
3315 .nr(8)
3316 .kr(4)
3317 .sr(1)
3318 .m(1)
3319 .n(8)
3320 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003321 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08003322 }
3323 }
3324
3325 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, k_lt_16_strided_a) {
3326 TEST_REQUIRES_ARM_NEON_V8;
3327 for (size_t k = 1; k < 16; k++) {
3328 GemmMicrokernelTester()
3329 .mr(1)
3330 .nr(8)
3331 .kr(4)
3332 .sr(1)
3333 .m(1)
3334 .n(8)
3335 .k(k)
3336 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08003337 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08003338 }
3339 }
3340
3341 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, k_lt_16_subtile) {
3342 TEST_REQUIRES_ARM_NEON_V8;
3343 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003344 for (uint32_t n = 1; n <= 8; n++) {
3345 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -08003346 GemmMicrokernelTester()
3347 .mr(1)
3348 .nr(8)
3349 .kr(4)
3350 .sr(1)
3351 .m(m)
3352 .n(n)
3353 .k(k)
3354 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003355 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08003356 }
3357 }
3358 }
3359 }
3360
3361 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, k_gt_16) {
3362 TEST_REQUIRES_ARM_NEON_V8;
3363 for (size_t k = 17; k < 32; k++) {
3364 GemmMicrokernelTester()
3365 .mr(1)
3366 .nr(8)
3367 .kr(4)
3368 .sr(1)
3369 .m(1)
3370 .n(8)
3371 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003372 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08003373 }
3374 }
3375
3376 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, k_gt_16_strided_a) {
3377 TEST_REQUIRES_ARM_NEON_V8;
3378 for (size_t k = 17; k < 32; k++) {
3379 GemmMicrokernelTester()
3380 .mr(1)
3381 .nr(8)
3382 .kr(4)
3383 .sr(1)
3384 .m(1)
3385 .n(8)
3386 .k(k)
3387 .a_stride(37)
Marat Dukhan50323b82022-01-11 00:12:01 -08003388 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08003389 }
3390 }
3391
3392 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, k_gt_16_subtile) {
3393 TEST_REQUIRES_ARM_NEON_V8;
3394 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003395 for (uint32_t n = 1; n <= 8; n++) {
3396 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -08003397 GemmMicrokernelTester()
3398 .mr(1)
3399 .nr(8)
3400 .kr(4)
3401 .sr(1)
3402 .m(m)
3403 .n(n)
3404 .k(k)
3405 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003406 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08003407 }
3408 }
3409 }
3410 }
3411
3412 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, k_div_16) {
3413 TEST_REQUIRES_ARM_NEON_V8;
3414 for (size_t k = 32; k <= 160; k += 16) {
3415 GemmMicrokernelTester()
3416 .mr(1)
3417 .nr(8)
3418 .kr(4)
3419 .sr(1)
3420 .m(1)
3421 .n(8)
3422 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003423 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08003424 }
3425 }
3426
3427 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, k_div_16_strided_a) {
3428 TEST_REQUIRES_ARM_NEON_V8;
3429 for (size_t k = 32; k <= 160; k += 16) {
3430 GemmMicrokernelTester()
3431 .mr(1)
3432 .nr(8)
3433 .kr(4)
3434 .sr(1)
3435 .m(1)
3436 .n(8)
3437 .k(k)
3438 .a_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -08003439 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08003440 }
3441 }
3442
3443 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, k_div_16_subtile) {
3444 TEST_REQUIRES_ARM_NEON_V8;
3445 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003446 for (uint32_t n = 1; n <= 8; n++) {
3447 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -08003448 GemmMicrokernelTester()
3449 .mr(1)
3450 .nr(8)
3451 .kr(4)
3452 .sr(1)
3453 .m(m)
3454 .n(n)
3455 .k(k)
3456 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003457 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08003458 }
3459 }
3460 }
3461 }
3462
3463 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, n_gt_8) {
3464 TEST_REQUIRES_ARM_NEON_V8;
3465 for (uint32_t n = 9; n < 16; n++) {
3466 for (size_t k = 1; k <= 80; k += 17) {
3467 GemmMicrokernelTester()
3468 .mr(1)
3469 .nr(8)
3470 .kr(4)
3471 .sr(1)
3472 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003473 .n(n)
Frank Barchard64ab1b72021-11-22 10:57:40 -08003474 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003475 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08003476 }
3477 }
3478 }
3479
3480 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, n_gt_8_strided_cn) {
3481 TEST_REQUIRES_ARM_NEON_V8;
3482 for (uint32_t n = 9; n < 16; n++) {
3483 for (size_t k = 1; k <= 80; k += 17) {
3484 GemmMicrokernelTester()
3485 .mr(1)
3486 .nr(8)
3487 .kr(4)
3488 .sr(1)
3489 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003490 .n(n)
Frank Barchard64ab1b72021-11-22 10:57:40 -08003491 .k(k)
3492 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08003493 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08003494 }
3495 }
3496 }
3497
3498 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, n_gt_8_strided_a) {
3499 TEST_REQUIRES_ARM_NEON_V8;
3500 for (uint32_t n = 9; n < 16; n++) {
3501 for (size_t k = 1; k <= 80; k += 17) {
3502 GemmMicrokernelTester()
3503 .mr(1)
3504 .nr(8)
3505 .kr(4)
3506 .sr(1)
3507 .m(1)
3508 .n(n)
3509 .k(k)
3510 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08003511 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08003512 }
3513 }
3514 }
3515
3516 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, n_gt_8_subtile) {
3517 TEST_REQUIRES_ARM_NEON_V8;
3518 for (uint32_t n = 9; n < 16; n++) {
3519 for (size_t k = 1; k <= 80; k += 17) {
3520 for (uint32_t m = 1; m <= 1; m++) {
3521 GemmMicrokernelTester()
3522 .mr(1)
3523 .nr(8)
3524 .kr(4)
3525 .sr(1)
3526 .m(m)
3527 .n(n)
3528 .k(k)
3529 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003530 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08003531 }
3532 }
3533 }
3534 }
3535
3536 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, n_div_8) {
3537 TEST_REQUIRES_ARM_NEON_V8;
3538 for (uint32_t n = 16; n <= 24; n += 8) {
3539 for (size_t k = 1; k <= 80; k += 17) {
3540 GemmMicrokernelTester()
3541 .mr(1)
3542 .nr(8)
3543 .kr(4)
3544 .sr(1)
3545 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003546 .n(n)
Frank Barchard64ab1b72021-11-22 10:57:40 -08003547 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003548 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08003549 }
3550 }
3551 }
3552
3553 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, n_div_8_strided_cn) {
3554 TEST_REQUIRES_ARM_NEON_V8;
3555 for (uint32_t n = 16; n <= 24; n += 8) {
3556 for (size_t k = 1; k <= 80; k += 17) {
3557 GemmMicrokernelTester()
3558 .mr(1)
3559 .nr(8)
3560 .kr(4)
3561 .sr(1)
3562 .m(1)
3563 .n(n)
3564 .k(k)
3565 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08003566 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08003567 }
3568 }
3569 }
3570
3571 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, n_div_8_strided_a) {
3572 TEST_REQUIRES_ARM_NEON_V8;
3573 for (uint32_t n = 16; n <= 24; n += 8) {
3574 for (size_t k = 1; k <= 80; k += 17) {
3575 GemmMicrokernelTester()
3576 .mr(1)
3577 .nr(8)
3578 .kr(4)
3579 .sr(1)
3580 .m(1)
3581 .n(n)
3582 .k(k)
3583 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08003584 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08003585 }
3586 }
3587 }
3588
3589 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, n_div_8_subtile) {
3590 TEST_REQUIRES_ARM_NEON_V8;
3591 for (uint32_t n = 16; n <= 24; n += 8) {
3592 for (size_t k = 1; k <= 80; k += 17) {
3593 for (uint32_t m = 1; m <= 1; m++) {
3594 GemmMicrokernelTester()
3595 .mr(1)
3596 .nr(8)
3597 .kr(4)
3598 .sr(1)
3599 .m(m)
3600 .n(n)
3601 .k(k)
3602 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003603 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08003604 }
3605 }
3606 }
3607 }
3608
3609 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, strided_cm_subtile) {
3610 TEST_REQUIRES_ARM_NEON_V8;
3611 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003612 for (uint32_t n = 1; n <= 8; n++) {
3613 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -08003614 GemmMicrokernelTester()
3615 .mr(1)
3616 .nr(8)
3617 .kr(4)
3618 .sr(1)
3619 .m(m)
3620 .n(n)
3621 .k(k)
3622 .cm_stride(11)
3623 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003624 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08003625 }
3626 }
3627 }
3628 }
3629
3630 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, qmin) {
3631 TEST_REQUIRES_ARM_NEON_V8;
3632 GemmMicrokernelTester()
3633 .mr(1)
3634 .nr(8)
3635 .kr(4)
3636 .sr(1)
3637 .m(1)
3638 .n(8)
3639 .k(16)
3640 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003641 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08003642 }
3643
3644 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, qmax) {
3645 TEST_REQUIRES_ARM_NEON_V8;
3646 GemmMicrokernelTester()
3647 .mr(1)
3648 .nr(8)
3649 .kr(4)
3650 .sr(1)
3651 .m(1)
3652 .n(8)
3653 .k(16)
3654 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003655 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08003656 }
3657
3658 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, strided_cm) {
3659 TEST_REQUIRES_ARM_NEON_V8;
3660 GemmMicrokernelTester()
3661 .mr(1)
3662 .nr(8)
3663 .kr(4)
3664 .sr(1)
3665 .m(1)
3666 .n(8)
3667 .k(16)
3668 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08003669 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08003670 }
3671#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
3672
3673
3674#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Frank Barchard64ab1b72021-11-22 10:57:40 -08003675 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, k_eq_16) {
3676 TEST_REQUIRES_ARM_NEON;
3677 GemmMicrokernelTester()
3678 .mr(1)
3679 .nr(8)
3680 .kr(4)
3681 .sr(1)
3682 .m(1)
3683 .n(8)
3684 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08003685 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08003686 }
3687
3688 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, strided_cn) {
3689 TEST_REQUIRES_ARM_NEON;
3690 GemmMicrokernelTester()
3691 .mr(1)
3692 .nr(8)
3693 .kr(4)
3694 .sr(1)
3695 .m(1)
3696 .n(8)
3697 .k(16)
3698 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08003699 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08003700 }
3701
3702 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, k_eq_16_strided_a) {
3703 TEST_REQUIRES_ARM_NEON;
3704 GemmMicrokernelTester()
3705 .mr(1)
3706 .nr(8)
3707 .kr(4)
3708 .sr(1)
3709 .m(1)
3710 .n(8)
3711 .k(16)
3712 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08003713 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08003714 }
3715
3716 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, k_eq_16_subtile) {
3717 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08003718 for (uint32_t n = 1; n <= 8; n++) {
3719 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -08003720 GemmMicrokernelTester()
3721 .mr(1)
3722 .nr(8)
3723 .kr(4)
3724 .sr(1)
3725 .m(m)
3726 .n(n)
3727 .k(16)
3728 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003729 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08003730 }
3731 }
3732 }
3733
3734 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, k_eq_16_subtile_m) {
3735 TEST_REQUIRES_ARM_NEON;
3736 for (uint32_t m = 1; m <= 1; m++) {
3737 GemmMicrokernelTester()
3738 .mr(1)
3739 .nr(8)
3740 .kr(4)
3741 .sr(1)
3742 .m(m)
3743 .n(8)
3744 .k(16)
3745 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003746 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08003747 }
3748 }
3749
3750 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, k_eq_16_subtile_n) {
3751 TEST_REQUIRES_ARM_NEON;
3752 for (uint32_t n = 1; n <= 8; n++) {
3753 GemmMicrokernelTester()
3754 .mr(1)
3755 .nr(8)
3756 .kr(4)
3757 .sr(1)
3758 .m(1)
3759 .n(n)
3760 .k(16)
3761 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003762 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08003763 }
3764 }
3765
3766 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, k_lt_16) {
3767 TEST_REQUIRES_ARM_NEON;
3768 for (size_t k = 1; k < 16; k++) {
3769 GemmMicrokernelTester()
3770 .mr(1)
3771 .nr(8)
3772 .kr(4)
3773 .sr(1)
3774 .m(1)
3775 .n(8)
3776 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003777 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08003778 }
3779 }
3780
3781 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, k_lt_16_strided_a) {
3782 TEST_REQUIRES_ARM_NEON;
3783 for (size_t k = 1; k < 16; k++) {
3784 GemmMicrokernelTester()
3785 .mr(1)
3786 .nr(8)
3787 .kr(4)
3788 .sr(1)
3789 .m(1)
3790 .n(8)
3791 .k(k)
3792 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08003793 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08003794 }
3795 }
3796
3797 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, k_lt_16_subtile) {
3798 TEST_REQUIRES_ARM_NEON;
3799 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003800 for (uint32_t n = 1; n <= 8; n++) {
3801 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -08003802 GemmMicrokernelTester()
3803 .mr(1)
3804 .nr(8)
3805 .kr(4)
3806 .sr(1)
3807 .m(m)
3808 .n(n)
3809 .k(k)
3810 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003811 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08003812 }
3813 }
3814 }
3815 }
3816
3817 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, k_gt_16) {
3818 TEST_REQUIRES_ARM_NEON;
3819 for (size_t k = 17; k < 32; k++) {
3820 GemmMicrokernelTester()
3821 .mr(1)
3822 .nr(8)
3823 .kr(4)
3824 .sr(1)
3825 .m(1)
3826 .n(8)
3827 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003828 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08003829 }
3830 }
3831
3832 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, k_gt_16_strided_a) {
3833 TEST_REQUIRES_ARM_NEON;
3834 for (size_t k = 17; k < 32; k++) {
3835 GemmMicrokernelTester()
3836 .mr(1)
3837 .nr(8)
3838 .kr(4)
3839 .sr(1)
3840 .m(1)
3841 .n(8)
3842 .k(k)
3843 .a_stride(37)
Marat Dukhan50323b82022-01-11 00:12:01 -08003844 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08003845 }
3846 }
3847
3848 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, k_gt_16_subtile) {
3849 TEST_REQUIRES_ARM_NEON;
3850 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003851 for (uint32_t n = 1; n <= 8; n++) {
3852 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -08003853 GemmMicrokernelTester()
3854 .mr(1)
3855 .nr(8)
3856 .kr(4)
3857 .sr(1)
3858 .m(m)
3859 .n(n)
3860 .k(k)
3861 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003862 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08003863 }
3864 }
3865 }
3866 }
3867
3868 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, k_div_16) {
3869 TEST_REQUIRES_ARM_NEON;
3870 for (size_t k = 32; k <= 160; k += 16) {
3871 GemmMicrokernelTester()
3872 .mr(1)
3873 .nr(8)
3874 .kr(4)
3875 .sr(1)
3876 .m(1)
3877 .n(8)
3878 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003879 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08003880 }
3881 }
3882
3883 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, k_div_16_strided_a) {
3884 TEST_REQUIRES_ARM_NEON;
3885 for (size_t k = 32; k <= 160; k += 16) {
3886 GemmMicrokernelTester()
3887 .mr(1)
3888 .nr(8)
3889 .kr(4)
3890 .sr(1)
3891 .m(1)
3892 .n(8)
3893 .k(k)
3894 .a_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -08003895 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08003896 }
3897 }
3898
3899 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, k_div_16_subtile) {
3900 TEST_REQUIRES_ARM_NEON;
3901 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003902 for (uint32_t n = 1; n <= 8; n++) {
3903 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -08003904 GemmMicrokernelTester()
3905 .mr(1)
3906 .nr(8)
3907 .kr(4)
3908 .sr(1)
3909 .m(m)
3910 .n(n)
3911 .k(k)
3912 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003913 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08003914 }
3915 }
3916 }
3917 }
3918
3919 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, n_gt_8) {
3920 TEST_REQUIRES_ARM_NEON;
3921 for (uint32_t n = 9; n < 16; n++) {
3922 for (size_t k = 1; k <= 80; k += 17) {
3923 GemmMicrokernelTester()
3924 .mr(1)
3925 .nr(8)
3926 .kr(4)
3927 .sr(1)
3928 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003929 .n(n)
Frank Barchard64ab1b72021-11-22 10:57:40 -08003930 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003931 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08003932 }
3933 }
3934 }
3935
3936 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, n_gt_8_strided_cn) {
3937 TEST_REQUIRES_ARM_NEON;
3938 for (uint32_t n = 9; n < 16; n++) {
3939 for (size_t k = 1; k <= 80; k += 17) {
3940 GemmMicrokernelTester()
3941 .mr(1)
3942 .nr(8)
3943 .kr(4)
3944 .sr(1)
3945 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003946 .n(n)
Frank Barchard64ab1b72021-11-22 10:57:40 -08003947 .k(k)
3948 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08003949 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08003950 }
3951 }
3952 }
3953
3954 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, n_gt_8_strided_a) {
3955 TEST_REQUIRES_ARM_NEON;
3956 for (uint32_t n = 9; n < 16; n++) {
3957 for (size_t k = 1; k <= 80; k += 17) {
3958 GemmMicrokernelTester()
3959 .mr(1)
3960 .nr(8)
3961 .kr(4)
3962 .sr(1)
3963 .m(1)
3964 .n(n)
3965 .k(k)
3966 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08003967 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08003968 }
3969 }
3970 }
3971
3972 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, n_gt_8_subtile) {
3973 TEST_REQUIRES_ARM_NEON;
3974 for (uint32_t n = 9; n < 16; n++) {
3975 for (size_t k = 1; k <= 80; k += 17) {
3976 for (uint32_t m = 1; m <= 1; m++) {
3977 GemmMicrokernelTester()
3978 .mr(1)
3979 .nr(8)
3980 .kr(4)
3981 .sr(1)
3982 .m(m)
3983 .n(n)
3984 .k(k)
3985 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003986 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08003987 }
3988 }
3989 }
3990 }
3991
3992 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, n_div_8) {
3993 TEST_REQUIRES_ARM_NEON;
3994 for (uint32_t n = 16; n <= 24; n += 8) {
3995 for (size_t k = 1; k <= 80; k += 17) {
3996 GemmMicrokernelTester()
3997 .mr(1)
3998 .nr(8)
3999 .kr(4)
4000 .sr(1)
4001 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004002 .n(n)
Frank Barchard64ab1b72021-11-22 10:57:40 -08004003 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004004 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08004005 }
4006 }
4007 }
4008
4009 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, n_div_8_strided_cn) {
4010 TEST_REQUIRES_ARM_NEON;
4011 for (uint32_t n = 16; n <= 24; n += 8) {
4012 for (size_t k = 1; k <= 80; k += 17) {
4013 GemmMicrokernelTester()
4014 .mr(1)
4015 .nr(8)
4016 .kr(4)
4017 .sr(1)
4018 .m(1)
4019 .n(n)
4020 .k(k)
4021 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08004022 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08004023 }
4024 }
4025 }
4026
4027 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, n_div_8_strided_a) {
4028 TEST_REQUIRES_ARM_NEON;
4029 for (uint32_t n = 16; n <= 24; n += 8) {
4030 for (size_t k = 1; k <= 80; k += 17) {
4031 GemmMicrokernelTester()
4032 .mr(1)
4033 .nr(8)
4034 .kr(4)
4035 .sr(1)
4036 .m(1)
4037 .n(n)
4038 .k(k)
4039 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08004040 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08004041 }
4042 }
4043 }
4044
4045 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, n_div_8_subtile) {
4046 TEST_REQUIRES_ARM_NEON;
4047 for (uint32_t n = 16; n <= 24; n += 8) {
4048 for (size_t k = 1; k <= 80; k += 17) {
4049 for (uint32_t m = 1; m <= 1; m++) {
4050 GemmMicrokernelTester()
4051 .mr(1)
4052 .nr(8)
4053 .kr(4)
4054 .sr(1)
4055 .m(m)
4056 .n(n)
4057 .k(k)
4058 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004059 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08004060 }
4061 }
4062 }
4063 }
4064
4065 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, strided_cm_subtile) {
4066 TEST_REQUIRES_ARM_NEON;
4067 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004068 for (uint32_t n = 1; n <= 8; n++) {
4069 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -08004070 GemmMicrokernelTester()
4071 .mr(1)
4072 .nr(8)
4073 .kr(4)
4074 .sr(1)
4075 .m(m)
4076 .n(n)
4077 .k(k)
4078 .cm_stride(11)
4079 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004080 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08004081 }
4082 }
4083 }
4084 }
4085
4086 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, qmin) {
4087 TEST_REQUIRES_ARM_NEON;
4088 GemmMicrokernelTester()
4089 .mr(1)
4090 .nr(8)
4091 .kr(4)
4092 .sr(1)
4093 .m(1)
4094 .n(8)
4095 .k(16)
4096 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004097 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08004098 }
4099
4100 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, qmax) {
4101 TEST_REQUIRES_ARM_NEON;
4102 GemmMicrokernelTester()
4103 .mr(1)
4104 .nr(8)
4105 .kr(4)
4106 .sr(1)
4107 .m(1)
4108 .n(8)
4109 .k(16)
4110 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004111 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08004112 }
4113
4114 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, strided_cm) {
4115 TEST_REQUIRES_ARM_NEON;
4116 GemmMicrokernelTester()
4117 .mr(1)
4118 .nr(8)
4119 .kr(4)
4120 .sr(1)
4121 .m(1)
4122 .n(8)
4123 .k(16)
4124 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08004125 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08004126 }
4127#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
4128
4129
4130#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Frank Barchard64ab1b72021-11-22 10:57:40 -08004131 TEST(QS8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, k_eq_16) {
4132 TEST_REQUIRES_ARM_NEON_V8;
4133 GemmMicrokernelTester()
4134 .mr(2)
4135 .nr(8)
4136 .kr(4)
4137 .sr(1)
4138 .m(2)
4139 .n(8)
4140 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08004141 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08004142 }
4143
4144 TEST(QS8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, strided_cn) {
4145 TEST_REQUIRES_ARM_NEON_V8;
4146 GemmMicrokernelTester()
4147 .mr(2)
4148 .nr(8)
4149 .kr(4)
4150 .sr(1)
4151 .m(2)
4152 .n(8)
4153 .k(16)
4154 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08004155 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08004156 }
4157
4158 TEST(QS8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, k_eq_16_strided_a) {
4159 TEST_REQUIRES_ARM_NEON_V8;
4160 GemmMicrokernelTester()
4161 .mr(2)
4162 .nr(8)
4163 .kr(4)
4164 .sr(1)
4165 .m(2)
4166 .n(8)
4167 .k(16)
4168 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08004169 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08004170 }
4171
4172 TEST(QS8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, k_eq_16_subtile) {
4173 TEST_REQUIRES_ARM_NEON_V8;
Zhi An Ng83844ae2022-01-14 09:52:25 -08004174 for (uint32_t n = 1; n <= 8; n++) {
4175 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -08004176 GemmMicrokernelTester()
4177 .mr(2)
4178 .nr(8)
4179 .kr(4)
4180 .sr(1)
4181 .m(m)
4182 .n(n)
4183 .k(16)
4184 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004185 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08004186 }
4187 }
4188 }
4189
4190 TEST(QS8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, k_eq_16_subtile_m) {
4191 TEST_REQUIRES_ARM_NEON_V8;
4192 for (uint32_t m = 1; m <= 2; m++) {
4193 GemmMicrokernelTester()
4194 .mr(2)
4195 .nr(8)
4196 .kr(4)
4197 .sr(1)
4198 .m(m)
4199 .n(8)
4200 .k(16)
4201 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004202 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08004203 }
4204 }
4205
4206 TEST(QS8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, k_eq_16_subtile_n) {
4207 TEST_REQUIRES_ARM_NEON_V8;
4208 for (uint32_t n = 1; n <= 8; n++) {
4209 GemmMicrokernelTester()
4210 .mr(2)
4211 .nr(8)
4212 .kr(4)
4213 .sr(1)
4214 .m(2)
4215 .n(n)
4216 .k(16)
4217 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004218 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08004219 }
4220 }
4221
4222 TEST(QS8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, k_lt_16) {
4223 TEST_REQUIRES_ARM_NEON_V8;
4224 for (size_t k = 1; k < 16; k++) {
4225 GemmMicrokernelTester()
4226 .mr(2)
4227 .nr(8)
4228 .kr(4)
4229 .sr(1)
4230 .m(2)
4231 .n(8)
4232 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004233 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08004234 }
4235 }
4236
4237 TEST(QS8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, k_lt_16_strided_a) {
4238 TEST_REQUIRES_ARM_NEON_V8;
4239 for (size_t k = 1; k < 16; k++) {
4240 GemmMicrokernelTester()
4241 .mr(2)
4242 .nr(8)
4243 .kr(4)
4244 .sr(1)
4245 .m(2)
4246 .n(8)
4247 .k(k)
4248 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08004249 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08004250 }
4251 }
4252
4253 TEST(QS8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, k_lt_16_subtile) {
4254 TEST_REQUIRES_ARM_NEON_V8;
4255 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004256 for (uint32_t n = 1; n <= 8; n++) {
4257 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -08004258 GemmMicrokernelTester()
4259 .mr(2)
4260 .nr(8)
4261 .kr(4)
4262 .sr(1)
4263 .m(m)
4264 .n(n)
4265 .k(k)
4266 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004267 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08004268 }
4269 }
4270 }
4271 }
4272
4273 TEST(QS8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, k_gt_16) {
4274 TEST_REQUIRES_ARM_NEON_V8;
4275 for (size_t k = 17; k < 32; k++) {
4276 GemmMicrokernelTester()
4277 .mr(2)
4278 .nr(8)
4279 .kr(4)
4280 .sr(1)
4281 .m(2)
4282 .n(8)
4283 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004284 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08004285 }
4286 }
4287
4288 TEST(QS8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, k_gt_16_strided_a) {
4289 TEST_REQUIRES_ARM_NEON_V8;
4290 for (size_t k = 17; k < 32; k++) {
4291 GemmMicrokernelTester()
4292 .mr(2)
4293 .nr(8)
4294 .kr(4)
4295 .sr(1)
4296 .m(2)
4297 .n(8)
4298 .k(k)
4299 .a_stride(37)
Marat Dukhan50323b82022-01-11 00:12:01 -08004300 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08004301 }
4302 }
4303
4304 TEST(QS8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, k_gt_16_subtile) {
4305 TEST_REQUIRES_ARM_NEON_V8;
4306 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004307 for (uint32_t n = 1; n <= 8; n++) {
4308 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -08004309 GemmMicrokernelTester()
4310 .mr(2)
4311 .nr(8)
4312 .kr(4)
4313 .sr(1)
4314 .m(m)
4315 .n(n)
4316 .k(k)
4317 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004318 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08004319 }
4320 }
4321 }
4322 }
4323
4324 TEST(QS8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, k_div_16) {
4325 TEST_REQUIRES_ARM_NEON_V8;
4326 for (size_t k = 32; k <= 160; k += 16) {
4327 GemmMicrokernelTester()
4328 .mr(2)
4329 .nr(8)
4330 .kr(4)
4331 .sr(1)
4332 .m(2)
4333 .n(8)
4334 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004335 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08004336 }
4337 }
4338
4339 TEST(QS8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, k_div_16_strided_a) {
4340 TEST_REQUIRES_ARM_NEON_V8;
4341 for (size_t k = 32; k <= 160; k += 16) {
4342 GemmMicrokernelTester()
4343 .mr(2)
4344 .nr(8)
4345 .kr(4)
4346 .sr(1)
4347 .m(2)
4348 .n(8)
4349 .k(k)
4350 .a_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -08004351 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08004352 }
4353 }
4354
4355 TEST(QS8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, k_div_16_subtile) {
4356 TEST_REQUIRES_ARM_NEON_V8;
4357 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004358 for (uint32_t n = 1; n <= 8; n++) {
4359 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -08004360 GemmMicrokernelTester()
4361 .mr(2)
4362 .nr(8)
4363 .kr(4)
4364 .sr(1)
4365 .m(m)
4366 .n(n)
4367 .k(k)
4368 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004369 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08004370 }
4371 }
4372 }
4373 }
4374
4375 TEST(QS8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, n_gt_8) {
4376 TEST_REQUIRES_ARM_NEON_V8;
4377 for (uint32_t n = 9; n < 16; n++) {
4378 for (size_t k = 1; k <= 80; k += 17) {
4379 GemmMicrokernelTester()
4380 .mr(2)
4381 .nr(8)
4382 .kr(4)
4383 .sr(1)
4384 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004385 .n(n)
Frank Barchard64ab1b72021-11-22 10:57:40 -08004386 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004387 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08004388 }
4389 }
4390 }
4391
4392 TEST(QS8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, n_gt_8_strided_cn) {
4393 TEST_REQUIRES_ARM_NEON_V8;
4394 for (uint32_t n = 9; n < 16; n++) {
4395 for (size_t k = 1; k <= 80; k += 17) {
4396 GemmMicrokernelTester()
4397 .mr(2)
4398 .nr(8)
4399 .kr(4)
4400 .sr(1)
4401 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004402 .n(n)
Frank Barchard64ab1b72021-11-22 10:57:40 -08004403 .k(k)
4404 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08004405 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08004406 }
4407 }
4408 }
4409
4410 TEST(QS8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, n_gt_8_strided_a) {
4411 TEST_REQUIRES_ARM_NEON_V8;
4412 for (uint32_t n = 9; n < 16; n++) {
4413 for (size_t k = 1; k <= 80; k += 17) {
4414 GemmMicrokernelTester()
4415 .mr(2)
4416 .nr(8)
4417 .kr(4)
4418 .sr(1)
4419 .m(2)
4420 .n(n)
4421 .k(k)
4422 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08004423 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08004424 }
4425 }
4426 }
4427
4428 TEST(QS8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, n_gt_8_subtile) {
4429 TEST_REQUIRES_ARM_NEON_V8;
4430 for (uint32_t n = 9; n < 16; n++) {
4431 for (size_t k = 1; k <= 80; k += 17) {
4432 for (uint32_t m = 1; m <= 2; m++) {
4433 GemmMicrokernelTester()
4434 .mr(2)
4435 .nr(8)
4436 .kr(4)
4437 .sr(1)
4438 .m(m)
4439 .n(n)
4440 .k(k)
4441 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004442 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08004443 }
4444 }
4445 }
4446 }
4447
4448 TEST(QS8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, n_div_8) {
4449 TEST_REQUIRES_ARM_NEON_V8;
4450 for (uint32_t n = 16; n <= 24; n += 8) {
4451 for (size_t k = 1; k <= 80; k += 17) {
4452 GemmMicrokernelTester()
4453 .mr(2)
4454 .nr(8)
4455 .kr(4)
4456 .sr(1)
4457 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004458 .n(n)
Frank Barchard64ab1b72021-11-22 10:57:40 -08004459 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004460 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08004461 }
4462 }
4463 }
4464
4465 TEST(QS8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, n_div_8_strided_cn) {
4466 TEST_REQUIRES_ARM_NEON_V8;
4467 for (uint32_t n = 16; n <= 24; n += 8) {
4468 for (size_t k = 1; k <= 80; k += 17) {
4469 GemmMicrokernelTester()
4470 .mr(2)
4471 .nr(8)
4472 .kr(4)
4473 .sr(1)
4474 .m(2)
4475 .n(n)
4476 .k(k)
4477 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08004478 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08004479 }
4480 }
4481 }
4482
4483 TEST(QS8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, n_div_8_strided_a) {
4484 TEST_REQUIRES_ARM_NEON_V8;
4485 for (uint32_t n = 16; n <= 24; n += 8) {
4486 for (size_t k = 1; k <= 80; k += 17) {
4487 GemmMicrokernelTester()
4488 .mr(2)
4489 .nr(8)
4490 .kr(4)
4491 .sr(1)
4492 .m(2)
4493 .n(n)
4494 .k(k)
4495 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08004496 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08004497 }
4498 }
4499 }
4500
4501 TEST(QS8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, n_div_8_subtile) {
4502 TEST_REQUIRES_ARM_NEON_V8;
4503 for (uint32_t n = 16; n <= 24; n += 8) {
4504 for (size_t k = 1; k <= 80; k += 17) {
4505 for (uint32_t m = 1; m <= 2; m++) {
4506 GemmMicrokernelTester()
4507 .mr(2)
4508 .nr(8)
4509 .kr(4)
4510 .sr(1)
4511 .m(m)
4512 .n(n)
4513 .k(k)
4514 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004515 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08004516 }
4517 }
4518 }
4519 }
4520
4521 TEST(QS8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, strided_cm_subtile) {
4522 TEST_REQUIRES_ARM_NEON_V8;
4523 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004524 for (uint32_t n = 1; n <= 8; n++) {
4525 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -08004526 GemmMicrokernelTester()
4527 .mr(2)
4528 .nr(8)
4529 .kr(4)
4530 .sr(1)
4531 .m(m)
4532 .n(n)
4533 .k(k)
4534 .cm_stride(11)
4535 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004536 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08004537 }
4538 }
4539 }
4540 }
4541
4542 TEST(QS8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, qmin) {
4543 TEST_REQUIRES_ARM_NEON_V8;
4544 GemmMicrokernelTester()
4545 .mr(2)
4546 .nr(8)
4547 .kr(4)
4548 .sr(1)
4549 .m(2)
4550 .n(8)
4551 .k(16)
4552 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004553 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08004554 }
4555
4556 TEST(QS8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, qmax) {
4557 TEST_REQUIRES_ARM_NEON_V8;
4558 GemmMicrokernelTester()
4559 .mr(2)
4560 .nr(8)
4561 .kr(4)
4562 .sr(1)
4563 .m(2)
4564 .n(8)
4565 .k(16)
4566 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004567 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08004568 }
4569
4570 TEST(QS8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, strided_cm) {
4571 TEST_REQUIRES_ARM_NEON_V8;
4572 GemmMicrokernelTester()
4573 .mr(2)
4574 .nr(8)
4575 .kr(4)
4576 .sr(1)
4577 .m(2)
4578 .n(8)
4579 .k(16)
4580 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08004581 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard64ab1b72021-11-22 10:57:40 -08004582 }
4583#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
4584
4585
4586#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Frank Barcharde22685a2021-11-12 11:36:58 -08004587 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, k_eq_16) {
Frank Barchard287952a2021-11-03 15:26:45 -07004588 TEST_REQUIRES_ARM_NEON_V8;
4589 GemmMicrokernelTester()
4590 .mr(1)
4591 .nr(8)
4592 .kr(2)
4593 .sr(1)
4594 .m(1)
4595 .n(8)
4596 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08004597 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07004598 }
4599
Frank Barcharde22685a2021-11-12 11:36:58 -08004600 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, strided_cn) {
Frank Barchard287952a2021-11-03 15:26:45 -07004601 TEST_REQUIRES_ARM_NEON_V8;
4602 GemmMicrokernelTester()
4603 .mr(1)
4604 .nr(8)
4605 .kr(2)
4606 .sr(1)
4607 .m(1)
4608 .n(8)
4609 .k(16)
4610 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08004611 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07004612 }
4613
Frank Barcharde22685a2021-11-12 11:36:58 -08004614 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, k_eq_16_strided_a) {
Frank Barchard287952a2021-11-03 15:26:45 -07004615 TEST_REQUIRES_ARM_NEON_V8;
4616 GemmMicrokernelTester()
4617 .mr(1)
4618 .nr(8)
4619 .kr(2)
4620 .sr(1)
4621 .m(1)
4622 .n(8)
4623 .k(16)
4624 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08004625 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07004626 }
4627
Frank Barcharde22685a2021-11-12 11:36:58 -08004628 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, k_eq_16_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -07004629 TEST_REQUIRES_ARM_NEON_V8;
Zhi An Ng83844ae2022-01-14 09:52:25 -08004630 for (uint32_t n = 1; n <= 8; n++) {
4631 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -07004632 GemmMicrokernelTester()
4633 .mr(1)
4634 .nr(8)
4635 .kr(2)
4636 .sr(1)
4637 .m(m)
4638 .n(n)
4639 .k(16)
4640 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004641 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07004642 }
4643 }
4644 }
4645
Frank Barcharde22685a2021-11-12 11:36:58 -08004646 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, k_eq_16_subtile_m) {
Frank Barchard287952a2021-11-03 15:26:45 -07004647 TEST_REQUIRES_ARM_NEON_V8;
4648 for (uint32_t m = 1; m <= 1; m++) {
4649 GemmMicrokernelTester()
4650 .mr(1)
4651 .nr(8)
4652 .kr(2)
4653 .sr(1)
4654 .m(m)
4655 .n(8)
4656 .k(16)
4657 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004658 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07004659 }
4660 }
4661
Frank Barcharde22685a2021-11-12 11:36:58 -08004662 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, k_eq_16_subtile_n) {
Frank Barchard287952a2021-11-03 15:26:45 -07004663 TEST_REQUIRES_ARM_NEON_V8;
4664 for (uint32_t n = 1; n <= 8; n++) {
4665 GemmMicrokernelTester()
4666 .mr(1)
4667 .nr(8)
4668 .kr(2)
4669 .sr(1)
4670 .m(1)
4671 .n(n)
4672 .k(16)
4673 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004674 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07004675 }
4676 }
4677
Frank Barcharde22685a2021-11-12 11:36:58 -08004678 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, k_lt_16) {
Frank Barchard287952a2021-11-03 15:26:45 -07004679 TEST_REQUIRES_ARM_NEON_V8;
4680 for (size_t k = 1; k < 16; k++) {
4681 GemmMicrokernelTester()
4682 .mr(1)
4683 .nr(8)
4684 .kr(2)
4685 .sr(1)
4686 .m(1)
4687 .n(8)
4688 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004689 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07004690 }
4691 }
4692
Frank Barcharde22685a2021-11-12 11:36:58 -08004693 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, k_lt_16_strided_a) {
Frank Barchard287952a2021-11-03 15:26:45 -07004694 TEST_REQUIRES_ARM_NEON_V8;
4695 for (size_t k = 1; k < 16; k++) {
4696 GemmMicrokernelTester()
4697 .mr(1)
4698 .nr(8)
4699 .kr(2)
4700 .sr(1)
4701 .m(1)
4702 .n(8)
4703 .k(k)
4704 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08004705 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07004706 }
4707 }
4708
Frank Barcharde22685a2021-11-12 11:36:58 -08004709 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, k_lt_16_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -07004710 TEST_REQUIRES_ARM_NEON_V8;
4711 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004712 for (uint32_t n = 1; n <= 8; n++) {
4713 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -07004714 GemmMicrokernelTester()
4715 .mr(1)
4716 .nr(8)
4717 .kr(2)
4718 .sr(1)
4719 .m(m)
4720 .n(n)
4721 .k(k)
4722 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004723 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07004724 }
4725 }
4726 }
4727 }
4728
Frank Barcharde22685a2021-11-12 11:36:58 -08004729 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, k_gt_16) {
Frank Barchard287952a2021-11-03 15:26:45 -07004730 TEST_REQUIRES_ARM_NEON_V8;
4731 for (size_t k = 17; k < 32; k++) {
4732 GemmMicrokernelTester()
4733 .mr(1)
4734 .nr(8)
4735 .kr(2)
4736 .sr(1)
4737 .m(1)
4738 .n(8)
4739 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004740 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07004741 }
4742 }
4743
Frank Barcharde22685a2021-11-12 11:36:58 -08004744 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, k_gt_16_strided_a) {
Frank Barchard287952a2021-11-03 15:26:45 -07004745 TEST_REQUIRES_ARM_NEON_V8;
4746 for (size_t k = 17; k < 32; k++) {
4747 GemmMicrokernelTester()
4748 .mr(1)
4749 .nr(8)
4750 .kr(2)
4751 .sr(1)
4752 .m(1)
4753 .n(8)
4754 .k(k)
4755 .a_stride(37)
Marat Dukhan50323b82022-01-11 00:12:01 -08004756 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07004757 }
4758 }
4759
Frank Barcharde22685a2021-11-12 11:36:58 -08004760 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, k_gt_16_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -07004761 TEST_REQUIRES_ARM_NEON_V8;
4762 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004763 for (uint32_t n = 1; n <= 8; n++) {
4764 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -07004765 GemmMicrokernelTester()
4766 .mr(1)
4767 .nr(8)
4768 .kr(2)
4769 .sr(1)
4770 .m(m)
4771 .n(n)
4772 .k(k)
4773 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004774 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07004775 }
4776 }
4777 }
4778 }
4779
Frank Barcharde22685a2021-11-12 11:36:58 -08004780 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, k_div_16) {
Frank Barchard287952a2021-11-03 15:26:45 -07004781 TEST_REQUIRES_ARM_NEON_V8;
4782 for (size_t k = 32; k <= 160; k += 16) {
4783 GemmMicrokernelTester()
4784 .mr(1)
4785 .nr(8)
4786 .kr(2)
4787 .sr(1)
4788 .m(1)
4789 .n(8)
4790 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004791 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07004792 }
4793 }
4794
Frank Barcharde22685a2021-11-12 11:36:58 -08004795 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, k_div_16_strided_a) {
Frank Barchard287952a2021-11-03 15:26:45 -07004796 TEST_REQUIRES_ARM_NEON_V8;
4797 for (size_t k = 32; k <= 160; k += 16) {
4798 GemmMicrokernelTester()
4799 .mr(1)
4800 .nr(8)
4801 .kr(2)
4802 .sr(1)
4803 .m(1)
4804 .n(8)
4805 .k(k)
4806 .a_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -08004807 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07004808 }
4809 }
4810
Frank Barcharde22685a2021-11-12 11:36:58 -08004811 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, k_div_16_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -07004812 TEST_REQUIRES_ARM_NEON_V8;
4813 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004814 for (uint32_t n = 1; n <= 8; n++) {
4815 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -07004816 GemmMicrokernelTester()
4817 .mr(1)
4818 .nr(8)
4819 .kr(2)
4820 .sr(1)
4821 .m(m)
4822 .n(n)
4823 .k(k)
4824 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004825 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07004826 }
4827 }
4828 }
4829 }
4830
Frank Barcharde22685a2021-11-12 11:36:58 -08004831 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, n_gt_8) {
Frank Barchard287952a2021-11-03 15:26:45 -07004832 TEST_REQUIRES_ARM_NEON_V8;
4833 for (uint32_t n = 9; n < 16; n++) {
4834 for (size_t k = 1; k <= 80; k += 17) {
4835 GemmMicrokernelTester()
4836 .mr(1)
4837 .nr(8)
4838 .kr(2)
4839 .sr(1)
4840 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004841 .n(n)
Frank Barchard287952a2021-11-03 15:26:45 -07004842 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004843 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07004844 }
4845 }
4846 }
4847
Frank Barcharde22685a2021-11-12 11:36:58 -08004848 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, n_gt_8_strided_cn) {
Frank Barchard287952a2021-11-03 15:26:45 -07004849 TEST_REQUIRES_ARM_NEON_V8;
4850 for (uint32_t n = 9; n < 16; n++) {
4851 for (size_t k = 1; k <= 80; k += 17) {
4852 GemmMicrokernelTester()
4853 .mr(1)
4854 .nr(8)
4855 .kr(2)
4856 .sr(1)
4857 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004858 .n(n)
Frank Barchard287952a2021-11-03 15:26:45 -07004859 .k(k)
4860 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08004861 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07004862 }
4863 }
4864 }
4865
Frank Barcharde22685a2021-11-12 11:36:58 -08004866 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, n_gt_8_strided_a) {
Frank Barchard287952a2021-11-03 15:26:45 -07004867 TEST_REQUIRES_ARM_NEON_V8;
4868 for (uint32_t n = 9; n < 16; n++) {
4869 for (size_t k = 1; k <= 80; k += 17) {
4870 GemmMicrokernelTester()
4871 .mr(1)
4872 .nr(8)
4873 .kr(2)
4874 .sr(1)
4875 .m(1)
4876 .n(n)
4877 .k(k)
4878 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08004879 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07004880 }
4881 }
4882 }
4883
Frank Barcharde22685a2021-11-12 11:36:58 -08004884 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, n_gt_8_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -07004885 TEST_REQUIRES_ARM_NEON_V8;
4886 for (uint32_t n = 9; n < 16; n++) {
4887 for (size_t k = 1; k <= 80; k += 17) {
4888 for (uint32_t m = 1; m <= 1; m++) {
4889 GemmMicrokernelTester()
4890 .mr(1)
4891 .nr(8)
4892 .kr(2)
4893 .sr(1)
4894 .m(m)
4895 .n(n)
4896 .k(k)
4897 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004898 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07004899 }
4900 }
4901 }
4902 }
4903
Frank Barcharde22685a2021-11-12 11:36:58 -08004904 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, n_div_8) {
Frank Barchard287952a2021-11-03 15:26:45 -07004905 TEST_REQUIRES_ARM_NEON_V8;
4906 for (uint32_t n = 16; n <= 24; n += 8) {
4907 for (size_t k = 1; k <= 80; k += 17) {
4908 GemmMicrokernelTester()
4909 .mr(1)
4910 .nr(8)
4911 .kr(2)
4912 .sr(1)
4913 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004914 .n(n)
Frank Barchard287952a2021-11-03 15:26:45 -07004915 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004916 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07004917 }
4918 }
4919 }
4920
Frank Barcharde22685a2021-11-12 11:36:58 -08004921 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, n_div_8_strided_cn) {
Frank Barchard287952a2021-11-03 15:26:45 -07004922 TEST_REQUIRES_ARM_NEON_V8;
4923 for (uint32_t n = 16; n <= 24; n += 8) {
4924 for (size_t k = 1; k <= 80; k += 17) {
4925 GemmMicrokernelTester()
4926 .mr(1)
4927 .nr(8)
4928 .kr(2)
4929 .sr(1)
4930 .m(1)
4931 .n(n)
4932 .k(k)
4933 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08004934 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07004935 }
4936 }
4937 }
4938
Frank Barcharde22685a2021-11-12 11:36:58 -08004939 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, n_div_8_strided_a) {
Frank Barchard287952a2021-11-03 15:26:45 -07004940 TEST_REQUIRES_ARM_NEON_V8;
4941 for (uint32_t n = 16; n <= 24; n += 8) {
4942 for (size_t k = 1; k <= 80; k += 17) {
4943 GemmMicrokernelTester()
4944 .mr(1)
4945 .nr(8)
4946 .kr(2)
4947 .sr(1)
4948 .m(1)
4949 .n(n)
4950 .k(k)
4951 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08004952 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07004953 }
4954 }
4955 }
4956
Frank Barcharde22685a2021-11-12 11:36:58 -08004957 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, n_div_8_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -07004958 TEST_REQUIRES_ARM_NEON_V8;
4959 for (uint32_t n = 16; n <= 24; n += 8) {
4960 for (size_t k = 1; k <= 80; k += 17) {
4961 for (uint32_t m = 1; m <= 1; m++) {
4962 GemmMicrokernelTester()
4963 .mr(1)
4964 .nr(8)
4965 .kr(2)
4966 .sr(1)
4967 .m(m)
4968 .n(n)
4969 .k(k)
4970 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004971 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07004972 }
4973 }
4974 }
4975 }
4976
Frank Barcharde22685a2021-11-12 11:36:58 -08004977 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, strided_cm_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -07004978 TEST_REQUIRES_ARM_NEON_V8;
4979 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004980 for (uint32_t n = 1; n <= 8; n++) {
4981 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -07004982 GemmMicrokernelTester()
4983 .mr(1)
4984 .nr(8)
4985 .kr(2)
4986 .sr(1)
4987 .m(m)
4988 .n(n)
4989 .k(k)
4990 .cm_stride(11)
4991 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004992 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07004993 }
4994 }
4995 }
4996 }
4997
Frank Barcharde22685a2021-11-12 11:36:58 -08004998 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, qmin) {
Frank Barchard287952a2021-11-03 15:26:45 -07004999 TEST_REQUIRES_ARM_NEON_V8;
5000 GemmMicrokernelTester()
5001 .mr(1)
5002 .nr(8)
5003 .kr(2)
5004 .sr(1)
5005 .m(1)
5006 .n(8)
5007 .k(16)
5008 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005009 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07005010 }
5011
Frank Barcharde22685a2021-11-12 11:36:58 -08005012 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, qmax) {
Frank Barchard287952a2021-11-03 15:26:45 -07005013 TEST_REQUIRES_ARM_NEON_V8;
5014 GemmMicrokernelTester()
5015 .mr(1)
5016 .nr(8)
5017 .kr(2)
5018 .sr(1)
5019 .m(1)
5020 .n(8)
5021 .k(16)
5022 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005023 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07005024 }
5025
Frank Barcharde22685a2021-11-12 11:36:58 -08005026 TEST(QS8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, strided_cm) {
Frank Barchard287952a2021-11-03 15:26:45 -07005027 TEST_REQUIRES_ARM_NEON_V8;
5028 GemmMicrokernelTester()
5029 .mr(1)
5030 .nr(8)
5031 .kr(2)
5032 .sr(1)
5033 .m(1)
5034 .n(8)
5035 .k(16)
5036 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08005037 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07005038 }
5039#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
5040
5041
5042#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Frank Barcharde22685a2021-11-12 11:36:58 -08005043 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, k_eq_16) {
Frank Barchard287952a2021-11-03 15:26:45 -07005044 TEST_REQUIRES_ARM_NEON_V8;
5045 GemmMicrokernelTester()
5046 .mr(2)
5047 .nr(8)
5048 .kr(2)
5049 .sr(1)
5050 .m(2)
5051 .n(8)
5052 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08005053 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07005054 }
5055
Frank Barcharde22685a2021-11-12 11:36:58 -08005056 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, strided_cn) {
Frank Barchard287952a2021-11-03 15:26:45 -07005057 TEST_REQUIRES_ARM_NEON_V8;
5058 GemmMicrokernelTester()
5059 .mr(2)
5060 .nr(8)
5061 .kr(2)
5062 .sr(1)
5063 .m(2)
5064 .n(8)
5065 .k(16)
5066 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08005067 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07005068 }
5069
Frank Barcharde22685a2021-11-12 11:36:58 -08005070 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, k_eq_16_strided_a) {
Frank Barchard287952a2021-11-03 15:26:45 -07005071 TEST_REQUIRES_ARM_NEON_V8;
5072 GemmMicrokernelTester()
5073 .mr(2)
5074 .nr(8)
5075 .kr(2)
5076 .sr(1)
5077 .m(2)
5078 .n(8)
5079 .k(16)
5080 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08005081 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07005082 }
5083
Frank Barcharde22685a2021-11-12 11:36:58 -08005084 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, k_eq_16_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -07005085 TEST_REQUIRES_ARM_NEON_V8;
Zhi An Ng83844ae2022-01-14 09:52:25 -08005086 for (uint32_t n = 1; n <= 8; n++) {
5087 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -07005088 GemmMicrokernelTester()
5089 .mr(2)
5090 .nr(8)
5091 .kr(2)
5092 .sr(1)
5093 .m(m)
5094 .n(n)
5095 .k(16)
5096 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005097 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07005098 }
5099 }
5100 }
5101
Frank Barcharde22685a2021-11-12 11:36:58 -08005102 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, k_eq_16_subtile_m) {
Frank Barchard287952a2021-11-03 15:26:45 -07005103 TEST_REQUIRES_ARM_NEON_V8;
5104 for (uint32_t m = 1; m <= 2; m++) {
5105 GemmMicrokernelTester()
5106 .mr(2)
5107 .nr(8)
5108 .kr(2)
5109 .sr(1)
5110 .m(m)
5111 .n(8)
5112 .k(16)
5113 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005114 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07005115 }
5116 }
5117
Frank Barcharde22685a2021-11-12 11:36:58 -08005118 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, k_eq_16_subtile_n) {
Frank Barchard287952a2021-11-03 15:26:45 -07005119 TEST_REQUIRES_ARM_NEON_V8;
5120 for (uint32_t n = 1; n <= 8; n++) {
5121 GemmMicrokernelTester()
5122 .mr(2)
5123 .nr(8)
5124 .kr(2)
5125 .sr(1)
5126 .m(2)
5127 .n(n)
5128 .k(16)
5129 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005130 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07005131 }
5132 }
5133
Frank Barcharde22685a2021-11-12 11:36:58 -08005134 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, k_lt_16) {
Frank Barchard287952a2021-11-03 15:26:45 -07005135 TEST_REQUIRES_ARM_NEON_V8;
5136 for (size_t k = 1; k < 16; k++) {
5137 GemmMicrokernelTester()
5138 .mr(2)
5139 .nr(8)
5140 .kr(2)
5141 .sr(1)
5142 .m(2)
5143 .n(8)
5144 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005145 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07005146 }
5147 }
5148
Frank Barcharde22685a2021-11-12 11:36:58 -08005149 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, k_lt_16_strided_a) {
Frank Barchard287952a2021-11-03 15:26:45 -07005150 TEST_REQUIRES_ARM_NEON_V8;
5151 for (size_t k = 1; k < 16; k++) {
5152 GemmMicrokernelTester()
5153 .mr(2)
5154 .nr(8)
5155 .kr(2)
5156 .sr(1)
5157 .m(2)
5158 .n(8)
5159 .k(k)
5160 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08005161 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07005162 }
5163 }
5164
Frank Barcharde22685a2021-11-12 11:36:58 -08005165 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, k_lt_16_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -07005166 TEST_REQUIRES_ARM_NEON_V8;
5167 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005168 for (uint32_t n = 1; n <= 8; n++) {
5169 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -07005170 GemmMicrokernelTester()
5171 .mr(2)
5172 .nr(8)
5173 .kr(2)
5174 .sr(1)
5175 .m(m)
5176 .n(n)
5177 .k(k)
5178 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005179 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07005180 }
5181 }
5182 }
5183 }
5184
Frank Barcharde22685a2021-11-12 11:36:58 -08005185 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, k_gt_16) {
Frank Barchard287952a2021-11-03 15:26:45 -07005186 TEST_REQUIRES_ARM_NEON_V8;
5187 for (size_t k = 17; k < 32; k++) {
5188 GemmMicrokernelTester()
5189 .mr(2)
5190 .nr(8)
5191 .kr(2)
5192 .sr(1)
5193 .m(2)
5194 .n(8)
5195 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005196 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07005197 }
5198 }
5199
Frank Barcharde22685a2021-11-12 11:36:58 -08005200 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, k_gt_16_strided_a) {
Frank Barchard287952a2021-11-03 15:26:45 -07005201 TEST_REQUIRES_ARM_NEON_V8;
5202 for (size_t k = 17; k < 32; k++) {
5203 GemmMicrokernelTester()
5204 .mr(2)
5205 .nr(8)
5206 .kr(2)
5207 .sr(1)
5208 .m(2)
5209 .n(8)
5210 .k(k)
5211 .a_stride(37)
Marat Dukhan50323b82022-01-11 00:12:01 -08005212 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07005213 }
5214 }
5215
Frank Barcharde22685a2021-11-12 11:36:58 -08005216 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, k_gt_16_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -07005217 TEST_REQUIRES_ARM_NEON_V8;
5218 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005219 for (uint32_t n = 1; n <= 8; n++) {
5220 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -07005221 GemmMicrokernelTester()
5222 .mr(2)
5223 .nr(8)
5224 .kr(2)
5225 .sr(1)
5226 .m(m)
5227 .n(n)
5228 .k(k)
5229 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005230 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07005231 }
5232 }
5233 }
5234 }
5235
Frank Barcharde22685a2021-11-12 11:36:58 -08005236 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, k_div_16) {
Frank Barchard287952a2021-11-03 15:26:45 -07005237 TEST_REQUIRES_ARM_NEON_V8;
5238 for (size_t k = 32; k <= 160; k += 16) {
5239 GemmMicrokernelTester()
5240 .mr(2)
5241 .nr(8)
5242 .kr(2)
5243 .sr(1)
5244 .m(2)
5245 .n(8)
5246 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005247 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07005248 }
5249 }
5250
Frank Barcharde22685a2021-11-12 11:36:58 -08005251 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, k_div_16_strided_a) {
Frank Barchard287952a2021-11-03 15:26:45 -07005252 TEST_REQUIRES_ARM_NEON_V8;
5253 for (size_t k = 32; k <= 160; k += 16) {
5254 GemmMicrokernelTester()
5255 .mr(2)
5256 .nr(8)
5257 .kr(2)
5258 .sr(1)
5259 .m(2)
5260 .n(8)
5261 .k(k)
5262 .a_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -08005263 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07005264 }
5265 }
5266
Frank Barcharde22685a2021-11-12 11:36:58 -08005267 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, k_div_16_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -07005268 TEST_REQUIRES_ARM_NEON_V8;
5269 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005270 for (uint32_t n = 1; n <= 8; n++) {
5271 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -07005272 GemmMicrokernelTester()
5273 .mr(2)
5274 .nr(8)
5275 .kr(2)
5276 .sr(1)
5277 .m(m)
5278 .n(n)
5279 .k(k)
5280 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005281 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07005282 }
5283 }
5284 }
5285 }
5286
Frank Barcharde22685a2021-11-12 11:36:58 -08005287 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, n_gt_8) {
Frank Barchard287952a2021-11-03 15:26:45 -07005288 TEST_REQUIRES_ARM_NEON_V8;
5289 for (uint32_t n = 9; n < 16; n++) {
5290 for (size_t k = 1; k <= 80; k += 17) {
5291 GemmMicrokernelTester()
5292 .mr(2)
5293 .nr(8)
5294 .kr(2)
5295 .sr(1)
5296 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005297 .n(n)
Frank Barchard287952a2021-11-03 15:26:45 -07005298 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005299 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07005300 }
5301 }
5302 }
5303
Frank Barcharde22685a2021-11-12 11:36:58 -08005304 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, n_gt_8_strided_cn) {
Frank Barchard287952a2021-11-03 15:26:45 -07005305 TEST_REQUIRES_ARM_NEON_V8;
5306 for (uint32_t n = 9; n < 16; n++) {
5307 for (size_t k = 1; k <= 80; k += 17) {
5308 GemmMicrokernelTester()
5309 .mr(2)
5310 .nr(8)
5311 .kr(2)
5312 .sr(1)
5313 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005314 .n(n)
Frank Barchard287952a2021-11-03 15:26:45 -07005315 .k(k)
5316 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08005317 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07005318 }
5319 }
5320 }
5321
Frank Barcharde22685a2021-11-12 11:36:58 -08005322 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, n_gt_8_strided_a) {
Frank Barchard287952a2021-11-03 15:26:45 -07005323 TEST_REQUIRES_ARM_NEON_V8;
5324 for (uint32_t n = 9; n < 16; n++) {
5325 for (size_t k = 1; k <= 80; k += 17) {
5326 GemmMicrokernelTester()
5327 .mr(2)
5328 .nr(8)
5329 .kr(2)
5330 .sr(1)
5331 .m(2)
5332 .n(n)
5333 .k(k)
5334 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08005335 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07005336 }
5337 }
5338 }
5339
Frank Barcharde22685a2021-11-12 11:36:58 -08005340 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, n_gt_8_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -07005341 TEST_REQUIRES_ARM_NEON_V8;
5342 for (uint32_t n = 9; n < 16; n++) {
5343 for (size_t k = 1; k <= 80; k += 17) {
5344 for (uint32_t m = 1; m <= 2; m++) {
5345 GemmMicrokernelTester()
5346 .mr(2)
5347 .nr(8)
5348 .kr(2)
5349 .sr(1)
5350 .m(m)
5351 .n(n)
5352 .k(k)
5353 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005354 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07005355 }
5356 }
5357 }
5358 }
5359
Frank Barcharde22685a2021-11-12 11:36:58 -08005360 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, n_div_8) {
Frank Barchard287952a2021-11-03 15:26:45 -07005361 TEST_REQUIRES_ARM_NEON_V8;
5362 for (uint32_t n = 16; n <= 24; n += 8) {
5363 for (size_t k = 1; k <= 80; k += 17) {
5364 GemmMicrokernelTester()
5365 .mr(2)
5366 .nr(8)
5367 .kr(2)
5368 .sr(1)
5369 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005370 .n(n)
Frank Barchard287952a2021-11-03 15:26:45 -07005371 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005372 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07005373 }
5374 }
5375 }
5376
Frank Barcharde22685a2021-11-12 11:36:58 -08005377 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, n_div_8_strided_cn) {
Frank Barchard287952a2021-11-03 15:26:45 -07005378 TEST_REQUIRES_ARM_NEON_V8;
5379 for (uint32_t n = 16; n <= 24; n += 8) {
5380 for (size_t k = 1; k <= 80; k += 17) {
5381 GemmMicrokernelTester()
5382 .mr(2)
5383 .nr(8)
5384 .kr(2)
5385 .sr(1)
5386 .m(2)
5387 .n(n)
5388 .k(k)
5389 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08005390 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07005391 }
5392 }
5393 }
5394
Frank Barcharde22685a2021-11-12 11:36:58 -08005395 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, n_div_8_strided_a) {
Frank Barchard287952a2021-11-03 15:26:45 -07005396 TEST_REQUIRES_ARM_NEON_V8;
5397 for (uint32_t n = 16; n <= 24; n += 8) {
5398 for (size_t k = 1; k <= 80; k += 17) {
5399 GemmMicrokernelTester()
5400 .mr(2)
5401 .nr(8)
5402 .kr(2)
5403 .sr(1)
5404 .m(2)
5405 .n(n)
5406 .k(k)
5407 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08005408 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07005409 }
5410 }
5411 }
5412
Frank Barcharde22685a2021-11-12 11:36:58 -08005413 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, n_div_8_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -07005414 TEST_REQUIRES_ARM_NEON_V8;
5415 for (uint32_t n = 16; n <= 24; n += 8) {
5416 for (size_t k = 1; k <= 80; k += 17) {
5417 for (uint32_t m = 1; m <= 2; m++) {
5418 GemmMicrokernelTester()
5419 .mr(2)
5420 .nr(8)
5421 .kr(2)
5422 .sr(1)
5423 .m(m)
5424 .n(n)
5425 .k(k)
5426 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005427 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07005428 }
5429 }
5430 }
5431 }
5432
Frank Barcharde22685a2021-11-12 11:36:58 -08005433 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, strided_cm_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -07005434 TEST_REQUIRES_ARM_NEON_V8;
5435 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005436 for (uint32_t n = 1; n <= 8; n++) {
5437 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -07005438 GemmMicrokernelTester()
5439 .mr(2)
5440 .nr(8)
5441 .kr(2)
5442 .sr(1)
5443 .m(m)
5444 .n(n)
5445 .k(k)
5446 .cm_stride(11)
5447 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005448 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07005449 }
5450 }
5451 }
5452 }
5453
Frank Barcharde22685a2021-11-12 11:36:58 -08005454 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, qmin) {
Frank Barchard287952a2021-11-03 15:26:45 -07005455 TEST_REQUIRES_ARM_NEON_V8;
5456 GemmMicrokernelTester()
5457 .mr(2)
5458 .nr(8)
5459 .kr(2)
5460 .sr(1)
5461 .m(2)
5462 .n(8)
5463 .k(16)
5464 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005465 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07005466 }
5467
Frank Barcharde22685a2021-11-12 11:36:58 -08005468 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, qmax) {
Frank Barchard287952a2021-11-03 15:26:45 -07005469 TEST_REQUIRES_ARM_NEON_V8;
5470 GemmMicrokernelTester()
5471 .mr(2)
5472 .nr(8)
5473 .kr(2)
5474 .sr(1)
5475 .m(2)
5476 .n(8)
5477 .k(16)
5478 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005479 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07005480 }
5481
Frank Barcharde22685a2021-11-12 11:36:58 -08005482 TEST(QS8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, strided_cm) {
Frank Barchard287952a2021-11-03 15:26:45 -07005483 TEST_REQUIRES_ARM_NEON_V8;
5484 GemmMicrokernelTester()
5485 .mr(2)
5486 .nr(8)
5487 .kr(2)
5488 .sr(1)
5489 .m(2)
5490 .n(8)
5491 .k(16)
5492 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08005493 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard287952a2021-11-03 15:26:45 -07005494 }
5495#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
5496
5497
Frank Barcharde4d3f762021-12-23 15:31:43 -08005498#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Frank Barcharde22685a2021-11-12 11:36:58 -08005499 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_eq_16) {
Frank Barchard960ae342021-07-01 11:31:11 -07005500 TEST_REQUIRES_ARM_NEON;
5501 GemmMicrokernelTester()
5502 .mr(2)
5503 .nr(8)
5504 .kr(8)
5505 .sr(1)
5506 .m(2)
5507 .n(8)
5508 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08005509 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07005510 }
5511
Frank Barcharde22685a2021-11-12 11:36:58 -08005512 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, strided_cn) {
Frank Barchard960ae342021-07-01 11:31:11 -07005513 TEST_REQUIRES_ARM_NEON;
5514 GemmMicrokernelTester()
5515 .mr(2)
5516 .nr(8)
5517 .kr(8)
5518 .sr(1)
5519 .m(2)
5520 .n(8)
5521 .k(16)
5522 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08005523 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07005524 }
5525
Frank Barcharde22685a2021-11-12 11:36:58 -08005526 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_eq_16_strided_a) {
Frank Barchard960ae342021-07-01 11:31:11 -07005527 TEST_REQUIRES_ARM_NEON;
5528 GemmMicrokernelTester()
5529 .mr(2)
5530 .nr(8)
5531 .kr(8)
5532 .sr(1)
5533 .m(2)
5534 .n(8)
5535 .k(16)
5536 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08005537 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07005538 }
5539
Frank Barcharde22685a2021-11-12 11:36:58 -08005540 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_eq_16_subtile) {
Frank Barchard960ae342021-07-01 11:31:11 -07005541 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08005542 for (uint32_t n = 1; n <= 8; n++) {
5543 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard960ae342021-07-01 11:31:11 -07005544 GemmMicrokernelTester()
5545 .mr(2)
5546 .nr(8)
5547 .kr(8)
5548 .sr(1)
5549 .m(m)
5550 .n(n)
5551 .k(16)
5552 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005553 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07005554 }
5555 }
5556 }
5557
Frank Barcharde22685a2021-11-12 11:36:58 -08005558 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_eq_16_subtile_m) {
Frank Barchard960ae342021-07-01 11:31:11 -07005559 TEST_REQUIRES_ARM_NEON;
5560 for (uint32_t m = 1; m <= 2; m++) {
5561 GemmMicrokernelTester()
5562 .mr(2)
5563 .nr(8)
5564 .kr(8)
5565 .sr(1)
5566 .m(m)
5567 .n(8)
5568 .k(16)
5569 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005570 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07005571 }
5572 }
5573
Frank Barcharde22685a2021-11-12 11:36:58 -08005574 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_eq_16_subtile_n) {
Frank Barchard960ae342021-07-01 11:31:11 -07005575 TEST_REQUIRES_ARM_NEON;
5576 for (uint32_t n = 1; n <= 8; n++) {
5577 GemmMicrokernelTester()
5578 .mr(2)
5579 .nr(8)
5580 .kr(8)
5581 .sr(1)
5582 .m(2)
5583 .n(n)
5584 .k(16)
5585 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005586 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07005587 }
5588 }
5589
Frank Barcharde22685a2021-11-12 11:36:58 -08005590 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_lt_16) {
Frank Barchard960ae342021-07-01 11:31:11 -07005591 TEST_REQUIRES_ARM_NEON;
5592 for (size_t k = 1; k < 16; k++) {
5593 GemmMicrokernelTester()
5594 .mr(2)
5595 .nr(8)
5596 .kr(8)
5597 .sr(1)
5598 .m(2)
5599 .n(8)
5600 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005601 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07005602 }
5603 }
5604
Frank Barcharde22685a2021-11-12 11:36:58 -08005605 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_lt_16_strided_a) {
Frank Barchard960ae342021-07-01 11:31:11 -07005606 TEST_REQUIRES_ARM_NEON;
5607 for (size_t k = 1; k < 16; k++) {
5608 GemmMicrokernelTester()
5609 .mr(2)
5610 .nr(8)
5611 .kr(8)
5612 .sr(1)
5613 .m(2)
5614 .n(8)
5615 .k(k)
5616 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08005617 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07005618 }
5619 }
5620
Frank Barcharde22685a2021-11-12 11:36:58 -08005621 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_lt_16_subtile) {
Frank Barchard960ae342021-07-01 11:31:11 -07005622 TEST_REQUIRES_ARM_NEON;
5623 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005624 for (uint32_t n = 1; n <= 8; n++) {
5625 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard960ae342021-07-01 11:31:11 -07005626 GemmMicrokernelTester()
5627 .mr(2)
5628 .nr(8)
5629 .kr(8)
5630 .sr(1)
5631 .m(m)
5632 .n(n)
5633 .k(k)
5634 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005635 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07005636 }
5637 }
5638 }
5639 }
5640
Frank Barcharde22685a2021-11-12 11:36:58 -08005641 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_gt_16) {
Frank Barchard960ae342021-07-01 11:31:11 -07005642 TEST_REQUIRES_ARM_NEON;
5643 for (size_t k = 17; k < 32; k++) {
5644 GemmMicrokernelTester()
5645 .mr(2)
5646 .nr(8)
5647 .kr(8)
5648 .sr(1)
5649 .m(2)
5650 .n(8)
5651 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005652 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07005653 }
5654 }
5655
Frank Barcharde22685a2021-11-12 11:36:58 -08005656 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_gt_16_strided_a) {
Frank Barchard960ae342021-07-01 11:31:11 -07005657 TEST_REQUIRES_ARM_NEON;
5658 for (size_t k = 17; k < 32; k++) {
5659 GemmMicrokernelTester()
5660 .mr(2)
5661 .nr(8)
5662 .kr(8)
5663 .sr(1)
5664 .m(2)
5665 .n(8)
5666 .k(k)
5667 .a_stride(37)
Marat Dukhan50323b82022-01-11 00:12:01 -08005668 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07005669 }
5670 }
5671
Frank Barcharde22685a2021-11-12 11:36:58 -08005672 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_gt_16_subtile) {
Frank Barchard960ae342021-07-01 11:31:11 -07005673 TEST_REQUIRES_ARM_NEON;
5674 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005675 for (uint32_t n = 1; n <= 8; n++) {
5676 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard960ae342021-07-01 11:31:11 -07005677 GemmMicrokernelTester()
5678 .mr(2)
5679 .nr(8)
5680 .kr(8)
5681 .sr(1)
5682 .m(m)
5683 .n(n)
5684 .k(k)
5685 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005686 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07005687 }
5688 }
5689 }
5690 }
5691
Frank Barcharde22685a2021-11-12 11:36:58 -08005692 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_div_16) {
Frank Barchard960ae342021-07-01 11:31:11 -07005693 TEST_REQUIRES_ARM_NEON;
5694 for (size_t k = 32; k <= 160; k += 16) {
5695 GemmMicrokernelTester()
5696 .mr(2)
5697 .nr(8)
5698 .kr(8)
5699 .sr(1)
5700 .m(2)
5701 .n(8)
5702 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005703 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07005704 }
5705 }
5706
Frank Barcharde22685a2021-11-12 11:36:58 -08005707 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_div_16_strided_a) {
Frank Barchard960ae342021-07-01 11:31:11 -07005708 TEST_REQUIRES_ARM_NEON;
5709 for (size_t k = 32; k <= 160; k += 16) {
5710 GemmMicrokernelTester()
5711 .mr(2)
5712 .nr(8)
5713 .kr(8)
5714 .sr(1)
5715 .m(2)
5716 .n(8)
5717 .k(k)
5718 .a_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -08005719 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07005720 }
5721 }
5722
Frank Barcharde22685a2021-11-12 11:36:58 -08005723 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_div_16_subtile) {
Frank Barchard960ae342021-07-01 11:31:11 -07005724 TEST_REQUIRES_ARM_NEON;
5725 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005726 for (uint32_t n = 1; n <= 8; n++) {
5727 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard960ae342021-07-01 11:31:11 -07005728 GemmMicrokernelTester()
5729 .mr(2)
5730 .nr(8)
5731 .kr(8)
5732 .sr(1)
5733 .m(m)
5734 .n(n)
5735 .k(k)
5736 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005737 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07005738 }
5739 }
5740 }
5741 }
5742
Frank Barcharde22685a2021-11-12 11:36:58 -08005743 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_gt_8) {
Frank Barchard960ae342021-07-01 11:31:11 -07005744 TEST_REQUIRES_ARM_NEON;
5745 for (uint32_t n = 9; n < 16; n++) {
5746 for (size_t k = 1; k <= 80; k += 17) {
5747 GemmMicrokernelTester()
5748 .mr(2)
5749 .nr(8)
5750 .kr(8)
5751 .sr(1)
5752 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005753 .n(n)
Frank Barchard960ae342021-07-01 11:31:11 -07005754 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005755 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07005756 }
5757 }
5758 }
5759
Frank Barcharde22685a2021-11-12 11:36:58 -08005760 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_gt_8_strided_cn) {
Frank Barchard960ae342021-07-01 11:31:11 -07005761 TEST_REQUIRES_ARM_NEON;
5762 for (uint32_t n = 9; n < 16; n++) {
5763 for (size_t k = 1; k <= 80; k += 17) {
5764 GemmMicrokernelTester()
5765 .mr(2)
5766 .nr(8)
5767 .kr(8)
5768 .sr(1)
5769 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005770 .n(n)
Frank Barchard960ae342021-07-01 11:31:11 -07005771 .k(k)
5772 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08005773 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07005774 }
5775 }
5776 }
5777
Frank Barcharde22685a2021-11-12 11:36:58 -08005778 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_gt_8_strided_a) {
Frank Barchard960ae342021-07-01 11:31:11 -07005779 TEST_REQUIRES_ARM_NEON;
5780 for (uint32_t n = 9; n < 16; n++) {
5781 for (size_t k = 1; k <= 80; k += 17) {
5782 GemmMicrokernelTester()
5783 .mr(2)
5784 .nr(8)
5785 .kr(8)
5786 .sr(1)
5787 .m(2)
5788 .n(n)
5789 .k(k)
5790 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08005791 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07005792 }
5793 }
5794 }
5795
Frank Barcharde22685a2021-11-12 11:36:58 -08005796 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_gt_8_subtile) {
Frank Barchard960ae342021-07-01 11:31:11 -07005797 TEST_REQUIRES_ARM_NEON;
5798 for (uint32_t n = 9; n < 16; n++) {
5799 for (size_t k = 1; k <= 80; k += 17) {
5800 for (uint32_t m = 1; m <= 2; m++) {
5801 GemmMicrokernelTester()
5802 .mr(2)
5803 .nr(8)
5804 .kr(8)
5805 .sr(1)
5806 .m(m)
5807 .n(n)
5808 .k(k)
5809 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005810 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07005811 }
5812 }
5813 }
5814 }
5815
Frank Barcharde22685a2021-11-12 11:36:58 -08005816 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_div_8) {
Frank Barchard960ae342021-07-01 11:31:11 -07005817 TEST_REQUIRES_ARM_NEON;
5818 for (uint32_t n = 16; n <= 24; n += 8) {
5819 for (size_t k = 1; k <= 80; k += 17) {
5820 GemmMicrokernelTester()
5821 .mr(2)
5822 .nr(8)
5823 .kr(8)
5824 .sr(1)
5825 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005826 .n(n)
Frank Barchard960ae342021-07-01 11:31:11 -07005827 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005828 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07005829 }
5830 }
5831 }
5832
Frank Barcharde22685a2021-11-12 11:36:58 -08005833 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_div_8_strided_cn) {
Frank Barchard960ae342021-07-01 11:31:11 -07005834 TEST_REQUIRES_ARM_NEON;
5835 for (uint32_t n = 16; n <= 24; n += 8) {
5836 for (size_t k = 1; k <= 80; k += 17) {
5837 GemmMicrokernelTester()
5838 .mr(2)
5839 .nr(8)
5840 .kr(8)
5841 .sr(1)
5842 .m(2)
5843 .n(n)
5844 .k(k)
5845 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08005846 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07005847 }
5848 }
5849 }
5850
Frank Barcharde22685a2021-11-12 11:36:58 -08005851 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_div_8_strided_a) {
Frank Barchard960ae342021-07-01 11:31:11 -07005852 TEST_REQUIRES_ARM_NEON;
5853 for (uint32_t n = 16; n <= 24; n += 8) {
5854 for (size_t k = 1; k <= 80; k += 17) {
5855 GemmMicrokernelTester()
5856 .mr(2)
5857 .nr(8)
5858 .kr(8)
5859 .sr(1)
5860 .m(2)
5861 .n(n)
5862 .k(k)
5863 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08005864 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07005865 }
5866 }
5867 }
5868
Frank Barcharde22685a2021-11-12 11:36:58 -08005869 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_div_8_subtile) {
Frank Barchard960ae342021-07-01 11:31:11 -07005870 TEST_REQUIRES_ARM_NEON;
5871 for (uint32_t n = 16; n <= 24; n += 8) {
5872 for (size_t k = 1; k <= 80; k += 17) {
5873 for (uint32_t m = 1; m <= 2; m++) {
5874 GemmMicrokernelTester()
5875 .mr(2)
5876 .nr(8)
5877 .kr(8)
5878 .sr(1)
5879 .m(m)
5880 .n(n)
5881 .k(k)
5882 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005883 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07005884 }
5885 }
5886 }
5887 }
5888
Frank Barcharde22685a2021-11-12 11:36:58 -08005889 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, strided_cm_subtile) {
Frank Barchard960ae342021-07-01 11:31:11 -07005890 TEST_REQUIRES_ARM_NEON;
5891 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005892 for (uint32_t n = 1; n <= 8; n++) {
5893 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard960ae342021-07-01 11:31:11 -07005894 GemmMicrokernelTester()
5895 .mr(2)
5896 .nr(8)
5897 .kr(8)
5898 .sr(1)
5899 .m(m)
5900 .n(n)
5901 .k(k)
5902 .cm_stride(11)
5903 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005904 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07005905 }
5906 }
5907 }
5908 }
5909
Frank Barcharde22685a2021-11-12 11:36:58 -08005910 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, qmin) {
Frank Barchard960ae342021-07-01 11:31:11 -07005911 TEST_REQUIRES_ARM_NEON;
5912 GemmMicrokernelTester()
5913 .mr(2)
5914 .nr(8)
5915 .kr(8)
5916 .sr(1)
5917 .m(2)
5918 .n(8)
5919 .k(16)
5920 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005921 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07005922 }
5923
Frank Barcharde22685a2021-11-12 11:36:58 -08005924 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, qmax) {
Frank Barchard960ae342021-07-01 11:31:11 -07005925 TEST_REQUIRES_ARM_NEON;
5926 GemmMicrokernelTester()
5927 .mr(2)
5928 .nr(8)
5929 .kr(8)
5930 .sr(1)
5931 .m(2)
5932 .n(8)
5933 .k(16)
5934 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005935 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07005936 }
5937
Frank Barcharde22685a2021-11-12 11:36:58 -08005938 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, strided_cm) {
Frank Barchard960ae342021-07-01 11:31:11 -07005939 TEST_REQUIRES_ARM_NEON;
5940 GemmMicrokernelTester()
5941 .mr(2)
5942 .nr(8)
5943 .kr(8)
5944 .sr(1)
5945 .m(2)
5946 .n(8)
5947 .k(16)
5948 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08005949 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07005950 }
Frank Barcharde4d3f762021-12-23 15:31:43 -08005951#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Frank Barchard960ae342021-07-01 11:31:11 -07005952
5953
Frank Barcharde4d3f762021-12-23 15:31:43 -08005954#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Frank Barcharde22685a2021-11-12 11:36:58 -08005955 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_eq_16) {
Frank Barchard960ae342021-07-01 11:31:11 -07005956 TEST_REQUIRES_ARM_NEON;
5957 GemmMicrokernelTester()
5958 .mr(2)
5959 .nr(8)
5960 .kr(8)
5961 .sr(1)
5962 .m(2)
5963 .n(8)
5964 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08005965 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07005966 }
5967
Frank Barcharde22685a2021-11-12 11:36:58 -08005968 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, strided_cn) {
Frank Barchard960ae342021-07-01 11:31:11 -07005969 TEST_REQUIRES_ARM_NEON;
5970 GemmMicrokernelTester()
5971 .mr(2)
5972 .nr(8)
5973 .kr(8)
5974 .sr(1)
5975 .m(2)
5976 .n(8)
5977 .k(16)
5978 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08005979 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07005980 }
5981
Frank Barcharde22685a2021-11-12 11:36:58 -08005982 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_eq_16_strided_a) {
Frank Barchard960ae342021-07-01 11:31:11 -07005983 TEST_REQUIRES_ARM_NEON;
5984 GemmMicrokernelTester()
5985 .mr(2)
5986 .nr(8)
5987 .kr(8)
5988 .sr(1)
5989 .m(2)
5990 .n(8)
5991 .k(16)
5992 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08005993 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07005994 }
5995
Frank Barcharde22685a2021-11-12 11:36:58 -08005996 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_eq_16_subtile) {
Frank Barchard960ae342021-07-01 11:31:11 -07005997 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08005998 for (uint32_t n = 1; n <= 8; n++) {
5999 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard960ae342021-07-01 11:31:11 -07006000 GemmMicrokernelTester()
6001 .mr(2)
6002 .nr(8)
6003 .kr(8)
6004 .sr(1)
6005 .m(m)
6006 .n(n)
6007 .k(16)
6008 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006009 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07006010 }
6011 }
6012 }
6013
Frank Barcharde22685a2021-11-12 11:36:58 -08006014 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_eq_16_subtile_m) {
Frank Barchard960ae342021-07-01 11:31:11 -07006015 TEST_REQUIRES_ARM_NEON;
6016 for (uint32_t m = 1; m <= 2; m++) {
6017 GemmMicrokernelTester()
6018 .mr(2)
6019 .nr(8)
6020 .kr(8)
6021 .sr(1)
6022 .m(m)
6023 .n(8)
6024 .k(16)
6025 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006026 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07006027 }
6028 }
6029
Frank Barcharde22685a2021-11-12 11:36:58 -08006030 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_eq_16_subtile_n) {
Frank Barchard960ae342021-07-01 11:31:11 -07006031 TEST_REQUIRES_ARM_NEON;
6032 for (uint32_t n = 1; n <= 8; n++) {
6033 GemmMicrokernelTester()
6034 .mr(2)
6035 .nr(8)
6036 .kr(8)
6037 .sr(1)
6038 .m(2)
6039 .n(n)
6040 .k(16)
6041 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006042 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07006043 }
6044 }
6045
Frank Barcharde22685a2021-11-12 11:36:58 -08006046 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_lt_16) {
Frank Barchard960ae342021-07-01 11:31:11 -07006047 TEST_REQUIRES_ARM_NEON;
6048 for (size_t k = 1; k < 16; k++) {
6049 GemmMicrokernelTester()
6050 .mr(2)
6051 .nr(8)
6052 .kr(8)
6053 .sr(1)
6054 .m(2)
6055 .n(8)
6056 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006057 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07006058 }
6059 }
6060
Frank Barcharde22685a2021-11-12 11:36:58 -08006061 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_lt_16_strided_a) {
Frank Barchard960ae342021-07-01 11:31:11 -07006062 TEST_REQUIRES_ARM_NEON;
6063 for (size_t k = 1; k < 16; k++) {
6064 GemmMicrokernelTester()
6065 .mr(2)
6066 .nr(8)
6067 .kr(8)
6068 .sr(1)
6069 .m(2)
6070 .n(8)
6071 .k(k)
6072 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08006073 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07006074 }
6075 }
6076
Frank Barcharde22685a2021-11-12 11:36:58 -08006077 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_lt_16_subtile) {
Frank Barchard960ae342021-07-01 11:31:11 -07006078 TEST_REQUIRES_ARM_NEON;
6079 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006080 for (uint32_t n = 1; n <= 8; n++) {
6081 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard960ae342021-07-01 11:31:11 -07006082 GemmMicrokernelTester()
6083 .mr(2)
6084 .nr(8)
6085 .kr(8)
6086 .sr(1)
6087 .m(m)
6088 .n(n)
6089 .k(k)
6090 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006091 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07006092 }
6093 }
6094 }
6095 }
6096
Frank Barcharde22685a2021-11-12 11:36:58 -08006097 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_gt_16) {
Frank Barchard960ae342021-07-01 11:31:11 -07006098 TEST_REQUIRES_ARM_NEON;
6099 for (size_t k = 17; k < 32; k++) {
6100 GemmMicrokernelTester()
6101 .mr(2)
6102 .nr(8)
6103 .kr(8)
6104 .sr(1)
6105 .m(2)
6106 .n(8)
6107 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006108 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07006109 }
6110 }
6111
Frank Barcharde22685a2021-11-12 11:36:58 -08006112 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_gt_16_strided_a) {
Frank Barchard960ae342021-07-01 11:31:11 -07006113 TEST_REQUIRES_ARM_NEON;
6114 for (size_t k = 17; k < 32; k++) {
6115 GemmMicrokernelTester()
6116 .mr(2)
6117 .nr(8)
6118 .kr(8)
6119 .sr(1)
6120 .m(2)
6121 .n(8)
6122 .k(k)
6123 .a_stride(37)
Marat Dukhan50323b82022-01-11 00:12:01 -08006124 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07006125 }
6126 }
6127
Frank Barcharde22685a2021-11-12 11:36:58 -08006128 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_gt_16_subtile) {
Frank Barchard960ae342021-07-01 11:31:11 -07006129 TEST_REQUIRES_ARM_NEON;
6130 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006131 for (uint32_t n = 1; n <= 8; n++) {
6132 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard960ae342021-07-01 11:31:11 -07006133 GemmMicrokernelTester()
6134 .mr(2)
6135 .nr(8)
6136 .kr(8)
6137 .sr(1)
6138 .m(m)
6139 .n(n)
6140 .k(k)
6141 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006142 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07006143 }
6144 }
6145 }
6146 }
6147
Frank Barcharde22685a2021-11-12 11:36:58 -08006148 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_div_16) {
Frank Barchard960ae342021-07-01 11:31:11 -07006149 TEST_REQUIRES_ARM_NEON;
6150 for (size_t k = 32; k <= 160; k += 16) {
6151 GemmMicrokernelTester()
6152 .mr(2)
6153 .nr(8)
6154 .kr(8)
6155 .sr(1)
6156 .m(2)
6157 .n(8)
6158 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006159 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07006160 }
6161 }
6162
Frank Barcharde22685a2021-11-12 11:36:58 -08006163 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_div_16_strided_a) {
Frank Barchard960ae342021-07-01 11:31:11 -07006164 TEST_REQUIRES_ARM_NEON;
6165 for (size_t k = 32; k <= 160; k += 16) {
6166 GemmMicrokernelTester()
6167 .mr(2)
6168 .nr(8)
6169 .kr(8)
6170 .sr(1)
6171 .m(2)
6172 .n(8)
6173 .k(k)
6174 .a_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -08006175 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07006176 }
6177 }
6178
Frank Barcharde22685a2021-11-12 11:36:58 -08006179 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_div_16_subtile) {
Frank Barchard960ae342021-07-01 11:31:11 -07006180 TEST_REQUIRES_ARM_NEON;
6181 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006182 for (uint32_t n = 1; n <= 8; n++) {
6183 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard960ae342021-07-01 11:31:11 -07006184 GemmMicrokernelTester()
6185 .mr(2)
6186 .nr(8)
6187 .kr(8)
6188 .sr(1)
6189 .m(m)
6190 .n(n)
6191 .k(k)
6192 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006193 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07006194 }
6195 }
6196 }
6197 }
6198
Frank Barcharde22685a2021-11-12 11:36:58 -08006199 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_gt_8) {
Frank Barchard960ae342021-07-01 11:31:11 -07006200 TEST_REQUIRES_ARM_NEON;
6201 for (uint32_t n = 9; n < 16; n++) {
6202 for (size_t k = 1; k <= 80; k += 17) {
6203 GemmMicrokernelTester()
6204 .mr(2)
6205 .nr(8)
6206 .kr(8)
6207 .sr(1)
6208 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006209 .n(n)
Frank Barchard960ae342021-07-01 11:31:11 -07006210 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006211 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07006212 }
6213 }
6214 }
6215
Frank Barcharde22685a2021-11-12 11:36:58 -08006216 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_gt_8_strided_cn) {
Frank Barchard960ae342021-07-01 11:31:11 -07006217 TEST_REQUIRES_ARM_NEON;
6218 for (uint32_t n = 9; n < 16; n++) {
6219 for (size_t k = 1; k <= 80; k += 17) {
6220 GemmMicrokernelTester()
6221 .mr(2)
6222 .nr(8)
6223 .kr(8)
6224 .sr(1)
6225 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006226 .n(n)
Frank Barchard960ae342021-07-01 11:31:11 -07006227 .k(k)
6228 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08006229 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07006230 }
6231 }
6232 }
6233
Frank Barcharde22685a2021-11-12 11:36:58 -08006234 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_gt_8_strided_a) {
Frank Barchard960ae342021-07-01 11:31:11 -07006235 TEST_REQUIRES_ARM_NEON;
6236 for (uint32_t n = 9; n < 16; n++) {
6237 for (size_t k = 1; k <= 80; k += 17) {
6238 GemmMicrokernelTester()
6239 .mr(2)
6240 .nr(8)
6241 .kr(8)
6242 .sr(1)
6243 .m(2)
6244 .n(n)
6245 .k(k)
6246 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08006247 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07006248 }
6249 }
6250 }
6251
Frank Barcharde22685a2021-11-12 11:36:58 -08006252 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_gt_8_subtile) {
Frank Barchard960ae342021-07-01 11:31:11 -07006253 TEST_REQUIRES_ARM_NEON;
6254 for (uint32_t n = 9; n < 16; n++) {
6255 for (size_t k = 1; k <= 80; k += 17) {
6256 for (uint32_t m = 1; m <= 2; m++) {
6257 GemmMicrokernelTester()
6258 .mr(2)
6259 .nr(8)
6260 .kr(8)
6261 .sr(1)
6262 .m(m)
6263 .n(n)
6264 .k(k)
6265 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006266 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07006267 }
6268 }
6269 }
6270 }
6271
Frank Barcharde22685a2021-11-12 11:36:58 -08006272 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_div_8) {
Frank Barchard960ae342021-07-01 11:31:11 -07006273 TEST_REQUIRES_ARM_NEON;
6274 for (uint32_t n = 16; n <= 24; n += 8) {
6275 for (size_t k = 1; k <= 80; k += 17) {
6276 GemmMicrokernelTester()
6277 .mr(2)
6278 .nr(8)
6279 .kr(8)
6280 .sr(1)
6281 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006282 .n(n)
Frank Barchard960ae342021-07-01 11:31:11 -07006283 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006284 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07006285 }
6286 }
6287 }
6288
Frank Barcharde22685a2021-11-12 11:36:58 -08006289 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_div_8_strided_cn) {
Frank Barchard960ae342021-07-01 11:31:11 -07006290 TEST_REQUIRES_ARM_NEON;
6291 for (uint32_t n = 16; n <= 24; n += 8) {
6292 for (size_t k = 1; k <= 80; k += 17) {
6293 GemmMicrokernelTester()
6294 .mr(2)
6295 .nr(8)
6296 .kr(8)
6297 .sr(1)
6298 .m(2)
6299 .n(n)
6300 .k(k)
6301 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08006302 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07006303 }
6304 }
6305 }
6306
Frank Barcharde22685a2021-11-12 11:36:58 -08006307 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_div_8_strided_a) {
Frank Barchard960ae342021-07-01 11:31:11 -07006308 TEST_REQUIRES_ARM_NEON;
6309 for (uint32_t n = 16; n <= 24; n += 8) {
6310 for (size_t k = 1; k <= 80; k += 17) {
6311 GemmMicrokernelTester()
6312 .mr(2)
6313 .nr(8)
6314 .kr(8)
6315 .sr(1)
6316 .m(2)
6317 .n(n)
6318 .k(k)
6319 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08006320 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07006321 }
6322 }
6323 }
6324
Frank Barcharde22685a2021-11-12 11:36:58 -08006325 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_div_8_subtile) {
Frank Barchard960ae342021-07-01 11:31:11 -07006326 TEST_REQUIRES_ARM_NEON;
6327 for (uint32_t n = 16; n <= 24; n += 8) {
6328 for (size_t k = 1; k <= 80; k += 17) {
6329 for (uint32_t m = 1; m <= 2; m++) {
6330 GemmMicrokernelTester()
6331 .mr(2)
6332 .nr(8)
6333 .kr(8)
6334 .sr(1)
6335 .m(m)
6336 .n(n)
6337 .k(k)
6338 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006339 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07006340 }
6341 }
6342 }
6343 }
6344
Frank Barcharde22685a2021-11-12 11:36:58 -08006345 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, strided_cm_subtile) {
Frank Barchard960ae342021-07-01 11:31:11 -07006346 TEST_REQUIRES_ARM_NEON;
6347 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006348 for (uint32_t n = 1; n <= 8; n++) {
6349 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard960ae342021-07-01 11:31:11 -07006350 GemmMicrokernelTester()
6351 .mr(2)
6352 .nr(8)
6353 .kr(8)
6354 .sr(1)
6355 .m(m)
6356 .n(n)
6357 .k(k)
6358 .cm_stride(11)
6359 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006360 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07006361 }
6362 }
6363 }
6364 }
6365
Frank Barcharde22685a2021-11-12 11:36:58 -08006366 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, qmin) {
Frank Barchard960ae342021-07-01 11:31:11 -07006367 TEST_REQUIRES_ARM_NEON;
6368 GemmMicrokernelTester()
6369 .mr(2)
6370 .nr(8)
6371 .kr(8)
6372 .sr(1)
6373 .m(2)
6374 .n(8)
6375 .k(16)
6376 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006377 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07006378 }
6379
Frank Barcharde22685a2021-11-12 11:36:58 -08006380 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, qmax) {
Frank Barchard960ae342021-07-01 11:31:11 -07006381 TEST_REQUIRES_ARM_NEON;
6382 GemmMicrokernelTester()
6383 .mr(2)
6384 .nr(8)
6385 .kr(8)
6386 .sr(1)
6387 .m(2)
6388 .n(8)
6389 .k(16)
6390 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006391 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07006392 }
6393
Frank Barcharde22685a2021-11-12 11:36:58 -08006394 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, strided_cm) {
Frank Barchard960ae342021-07-01 11:31:11 -07006395 TEST_REQUIRES_ARM_NEON;
6396 GemmMicrokernelTester()
6397 .mr(2)
6398 .nr(8)
6399 .kr(8)
6400 .sr(1)
6401 .m(2)
6402 .n(8)
6403 .k(16)
6404 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08006405 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07006406 }
Frank Barcharde4d3f762021-12-23 15:31:43 -08006407#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Frank Barchard960ae342021-07-01 11:31:11 -07006408
6409
Frank Barcharde4d3f762021-12-23 15:31:43 -08006410#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Frank Barcharde22685a2021-11-12 11:36:58 -08006411 TEST(QS8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_eq_16) {
Frank Barchard960ae342021-07-01 11:31:11 -07006412 TEST_REQUIRES_ARM_NEON;
6413 GemmMicrokernelTester()
6414 .mr(1)
6415 .nr(8)
6416 .kr(8)
6417 .sr(1)
6418 .m(1)
6419 .n(8)
6420 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08006421 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07006422 }
6423
Frank Barcharde22685a2021-11-12 11:36:58 -08006424 TEST(QS8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, strided_cn) {
Frank Barchard960ae342021-07-01 11:31:11 -07006425 TEST_REQUIRES_ARM_NEON;
6426 GemmMicrokernelTester()
6427 .mr(1)
6428 .nr(8)
6429 .kr(8)
6430 .sr(1)
6431 .m(1)
6432 .n(8)
6433 .k(16)
6434 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08006435 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07006436 }
6437
Frank Barcharde22685a2021-11-12 11:36:58 -08006438 TEST(QS8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_eq_16_strided_a) {
Frank Barchard960ae342021-07-01 11:31:11 -07006439 TEST_REQUIRES_ARM_NEON;
6440 GemmMicrokernelTester()
6441 .mr(1)
6442 .nr(8)
6443 .kr(8)
6444 .sr(1)
6445 .m(1)
6446 .n(8)
6447 .k(16)
6448 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08006449 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07006450 }
6451
Frank Barcharde22685a2021-11-12 11:36:58 -08006452 TEST(QS8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_eq_16_subtile) {
Frank Barchard960ae342021-07-01 11:31:11 -07006453 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08006454 for (uint32_t n = 1; n <= 8; n++) {
6455 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard960ae342021-07-01 11:31:11 -07006456 GemmMicrokernelTester()
6457 .mr(1)
6458 .nr(8)
6459 .kr(8)
6460 .sr(1)
6461 .m(m)
6462 .n(n)
6463 .k(16)
6464 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006465 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07006466 }
6467 }
6468 }
6469
Frank Barcharde22685a2021-11-12 11:36:58 -08006470 TEST(QS8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_eq_16_subtile_m) {
Frank Barchard960ae342021-07-01 11:31:11 -07006471 TEST_REQUIRES_ARM_NEON;
6472 for (uint32_t m = 1; m <= 1; m++) {
6473 GemmMicrokernelTester()
6474 .mr(1)
6475 .nr(8)
6476 .kr(8)
6477 .sr(1)
6478 .m(m)
6479 .n(8)
6480 .k(16)
6481 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006482 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07006483 }
6484 }
6485
Frank Barcharde22685a2021-11-12 11:36:58 -08006486 TEST(QS8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_eq_16_subtile_n) {
Frank Barchard960ae342021-07-01 11:31:11 -07006487 TEST_REQUIRES_ARM_NEON;
6488 for (uint32_t n = 1; n <= 8; n++) {
6489 GemmMicrokernelTester()
6490 .mr(1)
6491 .nr(8)
6492 .kr(8)
6493 .sr(1)
6494 .m(1)
6495 .n(n)
6496 .k(16)
6497 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006498 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07006499 }
6500 }
6501
Frank Barcharde22685a2021-11-12 11:36:58 -08006502 TEST(QS8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_lt_16) {
Frank Barchard960ae342021-07-01 11:31:11 -07006503 TEST_REQUIRES_ARM_NEON;
6504 for (size_t k = 1; k < 16; k++) {
6505 GemmMicrokernelTester()
6506 .mr(1)
6507 .nr(8)
6508 .kr(8)
6509 .sr(1)
6510 .m(1)
6511 .n(8)
6512 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006513 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07006514 }
6515 }
6516
Frank Barcharde22685a2021-11-12 11:36:58 -08006517 TEST(QS8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_lt_16_strided_a) {
Frank Barchard960ae342021-07-01 11:31:11 -07006518 TEST_REQUIRES_ARM_NEON;
6519 for (size_t k = 1; k < 16; k++) {
6520 GemmMicrokernelTester()
6521 .mr(1)
6522 .nr(8)
6523 .kr(8)
6524 .sr(1)
6525 .m(1)
6526 .n(8)
6527 .k(k)
6528 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08006529 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07006530 }
6531 }
6532
Frank Barcharde22685a2021-11-12 11:36:58 -08006533 TEST(QS8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_lt_16_subtile) {
Frank Barchard960ae342021-07-01 11:31:11 -07006534 TEST_REQUIRES_ARM_NEON;
6535 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006536 for (uint32_t n = 1; n <= 8; n++) {
6537 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard960ae342021-07-01 11:31:11 -07006538 GemmMicrokernelTester()
6539 .mr(1)
6540 .nr(8)
6541 .kr(8)
6542 .sr(1)
6543 .m(m)
6544 .n(n)
6545 .k(k)
6546 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006547 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07006548 }
6549 }
6550 }
6551 }
6552
Frank Barcharde22685a2021-11-12 11:36:58 -08006553 TEST(QS8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_gt_16) {
Frank Barchard960ae342021-07-01 11:31:11 -07006554 TEST_REQUIRES_ARM_NEON;
6555 for (size_t k = 17; k < 32; k++) {
6556 GemmMicrokernelTester()
6557 .mr(1)
6558 .nr(8)
6559 .kr(8)
6560 .sr(1)
6561 .m(1)
6562 .n(8)
6563 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006564 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07006565 }
6566 }
6567
Frank Barcharde22685a2021-11-12 11:36:58 -08006568 TEST(QS8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_gt_16_strided_a) {
Frank Barchard960ae342021-07-01 11:31:11 -07006569 TEST_REQUIRES_ARM_NEON;
6570 for (size_t k = 17; k < 32; k++) {
6571 GemmMicrokernelTester()
6572 .mr(1)
6573 .nr(8)
6574 .kr(8)
6575 .sr(1)
6576 .m(1)
6577 .n(8)
6578 .k(k)
6579 .a_stride(37)
Marat Dukhan50323b82022-01-11 00:12:01 -08006580 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07006581 }
6582 }
6583
Frank Barcharde22685a2021-11-12 11:36:58 -08006584 TEST(QS8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_gt_16_subtile) {
Frank Barchard960ae342021-07-01 11:31:11 -07006585 TEST_REQUIRES_ARM_NEON;
6586 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006587 for (uint32_t n = 1; n <= 8; n++) {
6588 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard960ae342021-07-01 11:31:11 -07006589 GemmMicrokernelTester()
6590 .mr(1)
6591 .nr(8)
6592 .kr(8)
6593 .sr(1)
6594 .m(m)
6595 .n(n)
6596 .k(k)
6597 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006598 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07006599 }
6600 }
6601 }
6602 }
6603
Frank Barcharde22685a2021-11-12 11:36:58 -08006604 TEST(QS8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_div_16) {
Frank Barchard960ae342021-07-01 11:31:11 -07006605 TEST_REQUIRES_ARM_NEON;
6606 for (size_t k = 32; k <= 160; k += 16) {
6607 GemmMicrokernelTester()
6608 .mr(1)
6609 .nr(8)
6610 .kr(8)
6611 .sr(1)
6612 .m(1)
6613 .n(8)
6614 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006615 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07006616 }
6617 }
6618
Frank Barcharde22685a2021-11-12 11:36:58 -08006619 TEST(QS8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_div_16_strided_a) {
Frank Barchard960ae342021-07-01 11:31:11 -07006620 TEST_REQUIRES_ARM_NEON;
6621 for (size_t k = 32; k <= 160; k += 16) {
6622 GemmMicrokernelTester()
6623 .mr(1)
6624 .nr(8)
6625 .kr(8)
6626 .sr(1)
6627 .m(1)
6628 .n(8)
6629 .k(k)
6630 .a_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -08006631 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07006632 }
6633 }
6634
Frank Barcharde22685a2021-11-12 11:36:58 -08006635 TEST(QS8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_div_16_subtile) {
Frank Barchard960ae342021-07-01 11:31:11 -07006636 TEST_REQUIRES_ARM_NEON;
6637 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006638 for (uint32_t n = 1; n <= 8; n++) {
6639 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard960ae342021-07-01 11:31:11 -07006640 GemmMicrokernelTester()
6641 .mr(1)
6642 .nr(8)
6643 .kr(8)
6644 .sr(1)
6645 .m(m)
6646 .n(n)
6647 .k(k)
6648 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006649 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07006650 }
6651 }
6652 }
6653 }
6654
Frank Barcharde22685a2021-11-12 11:36:58 -08006655 TEST(QS8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_gt_8) {
Frank Barchard960ae342021-07-01 11:31:11 -07006656 TEST_REQUIRES_ARM_NEON;
6657 for (uint32_t n = 9; n < 16; n++) {
6658 for (size_t k = 1; k <= 80; k += 17) {
6659 GemmMicrokernelTester()
6660 .mr(1)
6661 .nr(8)
6662 .kr(8)
6663 .sr(1)
6664 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006665 .n(n)
Frank Barchard960ae342021-07-01 11:31:11 -07006666 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006667 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07006668 }
6669 }
6670 }
6671
Frank Barcharde22685a2021-11-12 11:36:58 -08006672 TEST(QS8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_gt_8_strided_cn) {
Frank Barchard960ae342021-07-01 11:31:11 -07006673 TEST_REQUIRES_ARM_NEON;
6674 for (uint32_t n = 9; n < 16; n++) {
6675 for (size_t k = 1; k <= 80; k += 17) {
6676 GemmMicrokernelTester()
6677 .mr(1)
6678 .nr(8)
6679 .kr(8)
6680 .sr(1)
6681 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006682 .n(n)
Frank Barchard960ae342021-07-01 11:31:11 -07006683 .k(k)
6684 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08006685 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07006686 }
6687 }
6688 }
6689
Frank Barcharde22685a2021-11-12 11:36:58 -08006690 TEST(QS8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_gt_8_strided_a) {
Frank Barchard960ae342021-07-01 11:31:11 -07006691 TEST_REQUIRES_ARM_NEON;
6692 for (uint32_t n = 9; n < 16; n++) {
6693 for (size_t k = 1; k <= 80; k += 17) {
6694 GemmMicrokernelTester()
6695 .mr(1)
6696 .nr(8)
6697 .kr(8)
6698 .sr(1)
6699 .m(1)
6700 .n(n)
6701 .k(k)
6702 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08006703 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07006704 }
6705 }
6706 }
6707
Frank Barcharde22685a2021-11-12 11:36:58 -08006708 TEST(QS8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_gt_8_subtile) {
Frank Barchard960ae342021-07-01 11:31:11 -07006709 TEST_REQUIRES_ARM_NEON;
6710 for (uint32_t n = 9; n < 16; n++) {
6711 for (size_t k = 1; k <= 80; k += 17) {
6712 for (uint32_t m = 1; m <= 1; m++) {
6713 GemmMicrokernelTester()
6714 .mr(1)
6715 .nr(8)
6716 .kr(8)
6717 .sr(1)
6718 .m(m)
6719 .n(n)
6720 .k(k)
6721 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006722 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07006723 }
6724 }
6725 }
6726 }
6727
Frank Barcharde22685a2021-11-12 11:36:58 -08006728 TEST(QS8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_div_8) {
Frank Barchard960ae342021-07-01 11:31:11 -07006729 TEST_REQUIRES_ARM_NEON;
6730 for (uint32_t n = 16; n <= 24; n += 8) {
6731 for (size_t k = 1; k <= 80; k += 17) {
6732 GemmMicrokernelTester()
6733 .mr(1)
6734 .nr(8)
6735 .kr(8)
6736 .sr(1)
6737 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006738 .n(n)
Frank Barchard960ae342021-07-01 11:31:11 -07006739 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006740 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07006741 }
6742 }
6743 }
6744
Frank Barcharde22685a2021-11-12 11:36:58 -08006745 TEST(QS8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_div_8_strided_cn) {
Frank Barchard960ae342021-07-01 11:31:11 -07006746 TEST_REQUIRES_ARM_NEON;
6747 for (uint32_t n = 16; n <= 24; n += 8) {
6748 for (size_t k = 1; k <= 80; k += 17) {
6749 GemmMicrokernelTester()
6750 .mr(1)
6751 .nr(8)
6752 .kr(8)
6753 .sr(1)
6754 .m(1)
6755 .n(n)
6756 .k(k)
6757 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08006758 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07006759 }
6760 }
6761 }
6762
Frank Barcharde22685a2021-11-12 11:36:58 -08006763 TEST(QS8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_div_8_strided_a) {
Frank Barchard960ae342021-07-01 11:31:11 -07006764 TEST_REQUIRES_ARM_NEON;
6765 for (uint32_t n = 16; n <= 24; n += 8) {
6766 for (size_t k = 1; k <= 80; k += 17) {
6767 GemmMicrokernelTester()
6768 .mr(1)
6769 .nr(8)
6770 .kr(8)
6771 .sr(1)
6772 .m(1)
6773 .n(n)
6774 .k(k)
6775 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08006776 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07006777 }
6778 }
6779 }
6780
Frank Barcharde22685a2021-11-12 11:36:58 -08006781 TEST(QS8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_div_8_subtile) {
Frank Barchard960ae342021-07-01 11:31:11 -07006782 TEST_REQUIRES_ARM_NEON;
6783 for (uint32_t n = 16; n <= 24; n += 8) {
6784 for (size_t k = 1; k <= 80; k += 17) {
6785 for (uint32_t m = 1; m <= 1; m++) {
6786 GemmMicrokernelTester()
6787 .mr(1)
6788 .nr(8)
6789 .kr(8)
6790 .sr(1)
6791 .m(m)
6792 .n(n)
6793 .k(k)
6794 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006795 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07006796 }
6797 }
6798 }
6799 }
6800
Frank Barcharde22685a2021-11-12 11:36:58 -08006801 TEST(QS8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, strided_cm_subtile) {
Frank Barchard960ae342021-07-01 11:31:11 -07006802 TEST_REQUIRES_ARM_NEON;
6803 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006804 for (uint32_t n = 1; n <= 8; n++) {
6805 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard960ae342021-07-01 11:31:11 -07006806 GemmMicrokernelTester()
6807 .mr(1)
6808 .nr(8)
6809 .kr(8)
6810 .sr(1)
6811 .m(m)
6812 .n(n)
6813 .k(k)
6814 .cm_stride(11)
6815 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006816 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07006817 }
6818 }
6819 }
6820 }
6821
Frank Barcharde22685a2021-11-12 11:36:58 -08006822 TEST(QS8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, qmin) {
Frank Barchard960ae342021-07-01 11:31:11 -07006823 TEST_REQUIRES_ARM_NEON;
6824 GemmMicrokernelTester()
6825 .mr(1)
6826 .nr(8)
6827 .kr(8)
6828 .sr(1)
6829 .m(1)
6830 .n(8)
6831 .k(16)
6832 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006833 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07006834 }
6835
Frank Barcharde22685a2021-11-12 11:36:58 -08006836 TEST(QS8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, qmax) {
Frank Barchard960ae342021-07-01 11:31:11 -07006837 TEST_REQUIRES_ARM_NEON;
6838 GemmMicrokernelTester()
6839 .mr(1)
6840 .nr(8)
6841 .kr(8)
6842 .sr(1)
6843 .m(1)
6844 .n(8)
6845 .k(16)
6846 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006847 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07006848 }
6849
Frank Barcharde22685a2021-11-12 11:36:58 -08006850 TEST(QS8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, strided_cm) {
Frank Barchard960ae342021-07-01 11:31:11 -07006851 TEST_REQUIRES_ARM_NEON;
6852 GemmMicrokernelTester()
6853 .mr(1)
6854 .nr(8)
6855 .kr(8)
6856 .sr(1)
6857 .m(1)
6858 .n(8)
6859 .k(16)
6860 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08006861 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard960ae342021-07-01 11:31:11 -07006862 }
Frank Barcharde4d3f762021-12-23 15:31:43 -08006863#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Frank Barchard960ae342021-07-01 11:31:11 -07006864
6865
Frank Barcharde4d3f762021-12-23 15:31:43 -08006866#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Frank Barchard98af05c2021-06-30 12:15:04 -07006867 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_eq_8) {
6868 TEST_REQUIRES_ARM_NEON;
6869 GemmMicrokernelTester()
6870 .mr(4)
6871 .nr(16)
6872 .kr(1)
6873 .sr(1)
6874 .m(4)
6875 .n(16)
6876 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08006877 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard98af05c2021-06-30 12:15:04 -07006878 }
6879
6880 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, strided_cn) {
6881 TEST_REQUIRES_ARM_NEON;
6882 GemmMicrokernelTester()
6883 .mr(4)
6884 .nr(16)
6885 .kr(1)
6886 .sr(1)
6887 .m(4)
6888 .n(16)
6889 .k(8)
6890 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08006891 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard98af05c2021-06-30 12:15:04 -07006892 }
6893
6894 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_eq_8_strided_a) {
6895 TEST_REQUIRES_ARM_NEON;
6896 GemmMicrokernelTester()
6897 .mr(4)
6898 .nr(16)
6899 .kr(1)
6900 .sr(1)
6901 .m(4)
6902 .n(16)
6903 .k(8)
6904 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08006905 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard98af05c2021-06-30 12:15:04 -07006906 }
6907
6908 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_eq_8_subtile) {
6909 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08006910 for (uint32_t n = 1; n <= 16; n++) {
6911 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard98af05c2021-06-30 12:15:04 -07006912 GemmMicrokernelTester()
6913 .mr(4)
6914 .nr(16)
6915 .kr(1)
6916 .sr(1)
6917 .m(m)
6918 .n(n)
6919 .k(8)
6920 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006921 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard98af05c2021-06-30 12:15:04 -07006922 }
6923 }
6924 }
6925
6926 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_eq_8_subtile_m) {
6927 TEST_REQUIRES_ARM_NEON;
6928 for (uint32_t m = 1; m <= 4; m++) {
6929 GemmMicrokernelTester()
6930 .mr(4)
6931 .nr(16)
6932 .kr(1)
6933 .sr(1)
6934 .m(m)
6935 .n(16)
6936 .k(8)
6937 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006938 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard98af05c2021-06-30 12:15:04 -07006939 }
6940 }
6941
6942 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_eq_8_subtile_n) {
6943 TEST_REQUIRES_ARM_NEON;
6944 for (uint32_t n = 1; n <= 16; n++) {
6945 GemmMicrokernelTester()
6946 .mr(4)
6947 .nr(16)
6948 .kr(1)
6949 .sr(1)
6950 .m(4)
6951 .n(n)
6952 .k(8)
6953 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006954 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard98af05c2021-06-30 12:15:04 -07006955 }
6956 }
6957
6958 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_lt_8) {
6959 TEST_REQUIRES_ARM_NEON;
6960 for (size_t k = 1; k < 8; k++) {
6961 GemmMicrokernelTester()
6962 .mr(4)
6963 .nr(16)
6964 .kr(1)
6965 .sr(1)
6966 .m(4)
6967 .n(16)
6968 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006969 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard98af05c2021-06-30 12:15:04 -07006970 }
6971 }
6972
6973 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_lt_8_strided_a) {
6974 TEST_REQUIRES_ARM_NEON;
6975 for (size_t k = 1; k < 8; k++) {
6976 GemmMicrokernelTester()
6977 .mr(4)
6978 .nr(16)
6979 .kr(1)
6980 .sr(1)
6981 .m(4)
6982 .n(16)
6983 .k(k)
6984 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08006985 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard98af05c2021-06-30 12:15:04 -07006986 }
6987 }
6988
6989 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_lt_8_subtile) {
6990 TEST_REQUIRES_ARM_NEON;
6991 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006992 for (uint32_t n = 1; n <= 16; n++) {
6993 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard98af05c2021-06-30 12:15:04 -07006994 GemmMicrokernelTester()
6995 .mr(4)
6996 .nr(16)
6997 .kr(1)
6998 .sr(1)
6999 .m(m)
7000 .n(n)
7001 .k(k)
7002 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007003 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard98af05c2021-06-30 12:15:04 -07007004 }
7005 }
7006 }
7007 }
7008
7009 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_gt_8) {
7010 TEST_REQUIRES_ARM_NEON;
7011 for (size_t k = 9; k < 16; k++) {
7012 GemmMicrokernelTester()
7013 .mr(4)
7014 .nr(16)
7015 .kr(1)
7016 .sr(1)
7017 .m(4)
7018 .n(16)
7019 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007020 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard98af05c2021-06-30 12:15:04 -07007021 }
7022 }
7023
7024 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_gt_8_strided_a) {
7025 TEST_REQUIRES_ARM_NEON;
7026 for (size_t k = 9; k < 16; k++) {
7027 GemmMicrokernelTester()
7028 .mr(4)
7029 .nr(16)
7030 .kr(1)
7031 .sr(1)
7032 .m(4)
7033 .n(16)
7034 .k(k)
7035 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08007036 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard98af05c2021-06-30 12:15:04 -07007037 }
7038 }
7039
7040 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_gt_8_subtile) {
7041 TEST_REQUIRES_ARM_NEON;
7042 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007043 for (uint32_t n = 1; n <= 16; n++) {
7044 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard98af05c2021-06-30 12:15:04 -07007045 GemmMicrokernelTester()
7046 .mr(4)
7047 .nr(16)
7048 .kr(1)
7049 .sr(1)
7050 .m(m)
7051 .n(n)
7052 .k(k)
7053 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007054 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard98af05c2021-06-30 12:15:04 -07007055 }
7056 }
7057 }
7058 }
7059
7060 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_div_8) {
7061 TEST_REQUIRES_ARM_NEON;
7062 for (size_t k = 16; k <= 80; k += 8) {
7063 GemmMicrokernelTester()
7064 .mr(4)
7065 .nr(16)
7066 .kr(1)
7067 .sr(1)
7068 .m(4)
7069 .n(16)
7070 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007071 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard98af05c2021-06-30 12:15:04 -07007072 }
7073 }
7074
7075 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_div_8_strided_a) {
7076 TEST_REQUIRES_ARM_NEON;
7077 for (size_t k = 16; k <= 80; k += 8) {
7078 GemmMicrokernelTester()
7079 .mr(4)
7080 .nr(16)
7081 .kr(1)
7082 .sr(1)
7083 .m(4)
7084 .n(16)
7085 .k(k)
7086 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08007087 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard98af05c2021-06-30 12:15:04 -07007088 }
7089 }
7090
7091 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_div_8_subtile) {
7092 TEST_REQUIRES_ARM_NEON;
7093 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007094 for (uint32_t n = 1; n <= 16; n++) {
7095 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard98af05c2021-06-30 12:15:04 -07007096 GemmMicrokernelTester()
7097 .mr(4)
7098 .nr(16)
7099 .kr(1)
7100 .sr(1)
7101 .m(m)
7102 .n(n)
7103 .k(k)
7104 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007105 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard98af05c2021-06-30 12:15:04 -07007106 }
7107 }
7108 }
7109 }
7110
7111 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_gt_16) {
7112 TEST_REQUIRES_ARM_NEON;
7113 for (uint32_t n = 17; n < 32; n++) {
7114 for (size_t k = 1; k <= 40; k += 9) {
7115 GemmMicrokernelTester()
7116 .mr(4)
7117 .nr(16)
7118 .kr(1)
7119 .sr(1)
7120 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007121 .n(n)
Frank Barchard98af05c2021-06-30 12:15:04 -07007122 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007123 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard98af05c2021-06-30 12:15:04 -07007124 }
7125 }
7126 }
7127
7128 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_gt_16_strided_cn) {
7129 TEST_REQUIRES_ARM_NEON;
7130 for (uint32_t n = 17; n < 32; n++) {
7131 for (size_t k = 1; k <= 40; k += 9) {
7132 GemmMicrokernelTester()
7133 .mr(4)
7134 .nr(16)
7135 .kr(1)
7136 .sr(1)
7137 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007138 .n(n)
Frank Barchard98af05c2021-06-30 12:15:04 -07007139 .k(k)
7140 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08007141 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard98af05c2021-06-30 12:15:04 -07007142 }
7143 }
7144 }
7145
7146 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_gt_16_strided_a) {
7147 TEST_REQUIRES_ARM_NEON;
7148 for (uint32_t n = 17; n < 32; n++) {
7149 for (size_t k = 1; k <= 40; k += 9) {
7150 GemmMicrokernelTester()
7151 .mr(4)
7152 .nr(16)
7153 .kr(1)
7154 .sr(1)
7155 .m(4)
7156 .n(n)
7157 .k(k)
7158 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08007159 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard98af05c2021-06-30 12:15:04 -07007160 }
7161 }
7162 }
7163
7164 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_gt_16_subtile) {
7165 TEST_REQUIRES_ARM_NEON;
7166 for (uint32_t n = 17; n < 32; n++) {
7167 for (size_t k = 1; k <= 40; k += 9) {
7168 for (uint32_t m = 1; m <= 4; m++) {
7169 GemmMicrokernelTester()
7170 .mr(4)
7171 .nr(16)
7172 .kr(1)
7173 .sr(1)
7174 .m(m)
7175 .n(n)
7176 .k(k)
7177 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007178 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard98af05c2021-06-30 12:15:04 -07007179 }
7180 }
7181 }
7182 }
7183
7184 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_div_16) {
7185 TEST_REQUIRES_ARM_NEON;
7186 for (uint32_t n = 32; n <= 48; n += 16) {
7187 for (size_t k = 1; k <= 40; k += 9) {
7188 GemmMicrokernelTester()
7189 .mr(4)
7190 .nr(16)
7191 .kr(1)
7192 .sr(1)
7193 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007194 .n(n)
Frank Barchard98af05c2021-06-30 12:15:04 -07007195 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007196 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard98af05c2021-06-30 12:15:04 -07007197 }
7198 }
7199 }
7200
7201 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_div_16_strided_cn) {
7202 TEST_REQUIRES_ARM_NEON;
7203 for (uint32_t n = 32; n <= 48; n += 16) {
7204 for (size_t k = 1; k <= 40; k += 9) {
7205 GemmMicrokernelTester()
7206 .mr(4)
7207 .nr(16)
7208 .kr(1)
7209 .sr(1)
7210 .m(4)
7211 .n(n)
7212 .k(k)
7213 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08007214 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard98af05c2021-06-30 12:15:04 -07007215 }
7216 }
7217 }
7218
7219 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_div_16_strided_a) {
7220 TEST_REQUIRES_ARM_NEON;
7221 for (uint32_t n = 32; n <= 48; n += 16) {
7222 for (size_t k = 1; k <= 40; k += 9) {
7223 GemmMicrokernelTester()
7224 .mr(4)
7225 .nr(16)
7226 .kr(1)
7227 .sr(1)
7228 .m(4)
7229 .n(n)
7230 .k(k)
7231 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08007232 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard98af05c2021-06-30 12:15:04 -07007233 }
7234 }
7235 }
7236
7237 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_div_16_subtile) {
7238 TEST_REQUIRES_ARM_NEON;
7239 for (uint32_t n = 32; n <= 48; n += 16) {
7240 for (size_t k = 1; k <= 40; k += 9) {
7241 for (uint32_t m = 1; m <= 4; m++) {
7242 GemmMicrokernelTester()
7243 .mr(4)
7244 .nr(16)
7245 .kr(1)
7246 .sr(1)
7247 .m(m)
7248 .n(n)
7249 .k(k)
7250 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007251 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard98af05c2021-06-30 12:15:04 -07007252 }
7253 }
7254 }
7255 }
7256
7257 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, strided_cm_subtile) {
7258 TEST_REQUIRES_ARM_NEON;
7259 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007260 for (uint32_t n = 1; n <= 16; n++) {
7261 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard98af05c2021-06-30 12:15:04 -07007262 GemmMicrokernelTester()
7263 .mr(4)
7264 .nr(16)
7265 .kr(1)
7266 .sr(1)
7267 .m(m)
7268 .n(n)
7269 .k(k)
7270 .cm_stride(19)
7271 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007272 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard98af05c2021-06-30 12:15:04 -07007273 }
7274 }
7275 }
7276 }
7277
7278 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, qmin) {
7279 TEST_REQUIRES_ARM_NEON;
7280 GemmMicrokernelTester()
7281 .mr(4)
7282 .nr(16)
7283 .kr(1)
7284 .sr(1)
7285 .m(4)
7286 .n(16)
7287 .k(8)
7288 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007289 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard98af05c2021-06-30 12:15:04 -07007290 }
7291
7292 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, qmax) {
7293 TEST_REQUIRES_ARM_NEON;
7294 GemmMicrokernelTester()
7295 .mr(4)
7296 .nr(16)
7297 .kr(1)
7298 .sr(1)
7299 .m(4)
7300 .n(16)
7301 .k(8)
7302 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007303 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard98af05c2021-06-30 12:15:04 -07007304 }
7305
7306 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, strided_cm) {
7307 TEST_REQUIRES_ARM_NEON;
7308 GemmMicrokernelTester()
7309 .mr(4)
7310 .nr(16)
7311 .kr(1)
7312 .sr(1)
7313 .m(4)
7314 .n(16)
7315 .k(8)
7316 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08007317 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard98af05c2021-06-30 12:15:04 -07007318 }
Frank Barcharde4d3f762021-12-23 15:31:43 -08007319#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Frank Barchard98af05c2021-06-30 12:15:04 -07007320
7321
Frank Barcharde4d3f762021-12-23 15:31:43 -08007322#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Frank Barchard5cffb642021-11-22 13:59:43 -08007323 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_eq_8) {
7324 TEST_REQUIRES_ARM_NEON;
7325 GemmMicrokernelTester()
7326 .mr(4)
7327 .nr(16)
7328 .kr(1)
7329 .sr(1)
7330 .m(4)
7331 .n(16)
7332 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08007333 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard5cffb642021-11-22 13:59:43 -08007334 }
7335
7336 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, strided_cn) {
7337 TEST_REQUIRES_ARM_NEON;
7338 GemmMicrokernelTester()
7339 .mr(4)
7340 .nr(16)
7341 .kr(1)
7342 .sr(1)
7343 .m(4)
7344 .n(16)
7345 .k(8)
7346 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08007347 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard5cffb642021-11-22 13:59:43 -08007348 }
7349
7350 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_strided_a) {
7351 TEST_REQUIRES_ARM_NEON;
7352 GemmMicrokernelTester()
7353 .mr(4)
7354 .nr(16)
7355 .kr(1)
7356 .sr(1)
7357 .m(4)
7358 .n(16)
7359 .k(8)
7360 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08007361 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard5cffb642021-11-22 13:59:43 -08007362 }
7363
7364 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile) {
7365 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08007366 for (uint32_t n = 1; n <= 16; n++) {
7367 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard5cffb642021-11-22 13:59:43 -08007368 GemmMicrokernelTester()
7369 .mr(4)
7370 .nr(16)
7371 .kr(1)
7372 .sr(1)
7373 .m(m)
7374 .n(n)
7375 .k(8)
7376 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007377 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard5cffb642021-11-22 13:59:43 -08007378 }
7379 }
7380 }
7381
7382 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile_m) {
7383 TEST_REQUIRES_ARM_NEON;
7384 for (uint32_t m = 1; m <= 4; m++) {
7385 GemmMicrokernelTester()
7386 .mr(4)
7387 .nr(16)
7388 .kr(1)
7389 .sr(1)
7390 .m(m)
7391 .n(16)
7392 .k(8)
7393 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007394 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard5cffb642021-11-22 13:59:43 -08007395 }
7396 }
7397
7398 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile_n) {
7399 TEST_REQUIRES_ARM_NEON;
7400 for (uint32_t n = 1; n <= 16; n++) {
7401 GemmMicrokernelTester()
7402 .mr(4)
7403 .nr(16)
7404 .kr(1)
7405 .sr(1)
7406 .m(4)
7407 .n(n)
7408 .k(8)
7409 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007410 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard5cffb642021-11-22 13:59:43 -08007411 }
7412 }
7413
7414 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_lt_8) {
7415 TEST_REQUIRES_ARM_NEON;
7416 for (size_t k = 1; k < 8; k++) {
7417 GemmMicrokernelTester()
7418 .mr(4)
7419 .nr(16)
7420 .kr(1)
7421 .sr(1)
7422 .m(4)
7423 .n(16)
7424 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007425 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard5cffb642021-11-22 13:59:43 -08007426 }
7427 }
7428
7429 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_lt_8_strided_a) {
7430 TEST_REQUIRES_ARM_NEON;
7431 for (size_t k = 1; k < 8; k++) {
7432 GemmMicrokernelTester()
7433 .mr(4)
7434 .nr(16)
7435 .kr(1)
7436 .sr(1)
7437 .m(4)
7438 .n(16)
7439 .k(k)
7440 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08007441 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard5cffb642021-11-22 13:59:43 -08007442 }
7443 }
7444
7445 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_lt_8_subtile) {
7446 TEST_REQUIRES_ARM_NEON;
7447 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007448 for (uint32_t n = 1; n <= 16; n++) {
7449 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard5cffb642021-11-22 13:59:43 -08007450 GemmMicrokernelTester()
7451 .mr(4)
7452 .nr(16)
7453 .kr(1)
7454 .sr(1)
7455 .m(m)
7456 .n(n)
7457 .k(k)
7458 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007459 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard5cffb642021-11-22 13:59:43 -08007460 }
7461 }
7462 }
7463 }
7464
7465 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_gt_8) {
7466 TEST_REQUIRES_ARM_NEON;
7467 for (size_t k = 9; k < 16; k++) {
7468 GemmMicrokernelTester()
7469 .mr(4)
7470 .nr(16)
7471 .kr(1)
7472 .sr(1)
7473 .m(4)
7474 .n(16)
7475 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007476 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard5cffb642021-11-22 13:59:43 -08007477 }
7478 }
7479
7480 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_gt_8_strided_a) {
7481 TEST_REQUIRES_ARM_NEON;
7482 for (size_t k = 9; k < 16; k++) {
7483 GemmMicrokernelTester()
7484 .mr(4)
7485 .nr(16)
7486 .kr(1)
7487 .sr(1)
7488 .m(4)
7489 .n(16)
7490 .k(k)
7491 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08007492 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard5cffb642021-11-22 13:59:43 -08007493 }
7494 }
7495
7496 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_gt_8_subtile) {
7497 TEST_REQUIRES_ARM_NEON;
7498 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007499 for (uint32_t n = 1; n <= 16; n++) {
7500 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard5cffb642021-11-22 13:59:43 -08007501 GemmMicrokernelTester()
7502 .mr(4)
7503 .nr(16)
7504 .kr(1)
7505 .sr(1)
7506 .m(m)
7507 .n(n)
7508 .k(k)
7509 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007510 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard5cffb642021-11-22 13:59:43 -08007511 }
7512 }
7513 }
7514 }
7515
7516 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_div_8) {
7517 TEST_REQUIRES_ARM_NEON;
7518 for (size_t k = 16; k <= 80; k += 8) {
7519 GemmMicrokernelTester()
7520 .mr(4)
7521 .nr(16)
7522 .kr(1)
7523 .sr(1)
7524 .m(4)
7525 .n(16)
7526 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007527 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard5cffb642021-11-22 13:59:43 -08007528 }
7529 }
7530
7531 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_div_8_strided_a) {
7532 TEST_REQUIRES_ARM_NEON;
7533 for (size_t k = 16; k <= 80; k += 8) {
7534 GemmMicrokernelTester()
7535 .mr(4)
7536 .nr(16)
7537 .kr(1)
7538 .sr(1)
7539 .m(4)
7540 .n(16)
7541 .k(k)
7542 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08007543 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard5cffb642021-11-22 13:59:43 -08007544 }
7545 }
7546
7547 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_div_8_subtile) {
7548 TEST_REQUIRES_ARM_NEON;
7549 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007550 for (uint32_t n = 1; n <= 16; n++) {
7551 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard5cffb642021-11-22 13:59:43 -08007552 GemmMicrokernelTester()
7553 .mr(4)
7554 .nr(16)
7555 .kr(1)
7556 .sr(1)
7557 .m(m)
7558 .n(n)
7559 .k(k)
7560 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007561 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard5cffb642021-11-22 13:59:43 -08007562 }
7563 }
7564 }
7565 }
7566
7567 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_gt_16) {
7568 TEST_REQUIRES_ARM_NEON;
7569 for (uint32_t n = 17; n < 32; n++) {
7570 for (size_t k = 1; k <= 40; k += 9) {
7571 GemmMicrokernelTester()
7572 .mr(4)
7573 .nr(16)
7574 .kr(1)
7575 .sr(1)
7576 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007577 .n(n)
Frank Barchard5cffb642021-11-22 13:59:43 -08007578 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007579 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard5cffb642021-11-22 13:59:43 -08007580 }
7581 }
7582 }
7583
7584 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_gt_16_strided_cn) {
7585 TEST_REQUIRES_ARM_NEON;
7586 for (uint32_t n = 17; n < 32; n++) {
7587 for (size_t k = 1; k <= 40; k += 9) {
7588 GemmMicrokernelTester()
7589 .mr(4)
7590 .nr(16)
7591 .kr(1)
7592 .sr(1)
7593 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007594 .n(n)
Frank Barchard5cffb642021-11-22 13:59:43 -08007595 .k(k)
7596 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08007597 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard5cffb642021-11-22 13:59:43 -08007598 }
7599 }
7600 }
7601
7602 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_gt_16_strided_a) {
7603 TEST_REQUIRES_ARM_NEON;
7604 for (uint32_t n = 17; n < 32; n++) {
7605 for (size_t k = 1; k <= 40; k += 9) {
7606 GemmMicrokernelTester()
7607 .mr(4)
7608 .nr(16)
7609 .kr(1)
7610 .sr(1)
7611 .m(4)
7612 .n(n)
7613 .k(k)
7614 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08007615 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard5cffb642021-11-22 13:59:43 -08007616 }
7617 }
7618 }
7619
7620 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_gt_16_subtile) {
7621 TEST_REQUIRES_ARM_NEON;
7622 for (uint32_t n = 17; n < 32; n++) {
7623 for (size_t k = 1; k <= 40; k += 9) {
7624 for (uint32_t m = 1; m <= 4; m++) {
7625 GemmMicrokernelTester()
7626 .mr(4)
7627 .nr(16)
7628 .kr(1)
7629 .sr(1)
7630 .m(m)
7631 .n(n)
7632 .k(k)
7633 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007634 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard5cffb642021-11-22 13:59:43 -08007635 }
7636 }
7637 }
7638 }
7639
7640 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_div_16) {
7641 TEST_REQUIRES_ARM_NEON;
7642 for (uint32_t n = 32; n <= 48; n += 16) {
7643 for (size_t k = 1; k <= 40; k += 9) {
7644 GemmMicrokernelTester()
7645 .mr(4)
7646 .nr(16)
7647 .kr(1)
7648 .sr(1)
7649 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007650 .n(n)
Frank Barchard5cffb642021-11-22 13:59:43 -08007651 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007652 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard5cffb642021-11-22 13:59:43 -08007653 }
7654 }
7655 }
7656
7657 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_div_16_strided_cn) {
7658 TEST_REQUIRES_ARM_NEON;
7659 for (uint32_t n = 32; n <= 48; n += 16) {
7660 for (size_t k = 1; k <= 40; k += 9) {
7661 GemmMicrokernelTester()
7662 .mr(4)
7663 .nr(16)
7664 .kr(1)
7665 .sr(1)
7666 .m(4)
7667 .n(n)
7668 .k(k)
7669 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08007670 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard5cffb642021-11-22 13:59:43 -08007671 }
7672 }
7673 }
7674
7675 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_div_16_strided_a) {
7676 TEST_REQUIRES_ARM_NEON;
7677 for (uint32_t n = 32; n <= 48; n += 16) {
7678 for (size_t k = 1; k <= 40; k += 9) {
7679 GemmMicrokernelTester()
7680 .mr(4)
7681 .nr(16)
7682 .kr(1)
7683 .sr(1)
7684 .m(4)
7685 .n(n)
7686 .k(k)
7687 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08007688 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard5cffb642021-11-22 13:59:43 -08007689 }
7690 }
7691 }
7692
7693 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_div_16_subtile) {
7694 TEST_REQUIRES_ARM_NEON;
7695 for (uint32_t n = 32; n <= 48; n += 16) {
7696 for (size_t k = 1; k <= 40; k += 9) {
7697 for (uint32_t m = 1; m <= 4; m++) {
7698 GemmMicrokernelTester()
7699 .mr(4)
7700 .nr(16)
7701 .kr(1)
7702 .sr(1)
7703 .m(m)
7704 .n(n)
7705 .k(k)
7706 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007707 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard5cffb642021-11-22 13:59:43 -08007708 }
7709 }
7710 }
7711 }
7712
7713 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, strided_cm_subtile) {
7714 TEST_REQUIRES_ARM_NEON;
7715 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007716 for (uint32_t n = 1; n <= 16; n++) {
7717 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard5cffb642021-11-22 13:59:43 -08007718 GemmMicrokernelTester()
7719 .mr(4)
7720 .nr(16)
7721 .kr(1)
7722 .sr(1)
7723 .m(m)
7724 .n(n)
7725 .k(k)
7726 .cm_stride(19)
7727 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007728 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard5cffb642021-11-22 13:59:43 -08007729 }
7730 }
7731 }
7732 }
7733
7734 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, qmin) {
7735 TEST_REQUIRES_ARM_NEON;
7736 GemmMicrokernelTester()
7737 .mr(4)
7738 .nr(16)
7739 .kr(1)
7740 .sr(1)
7741 .m(4)
7742 .n(16)
7743 .k(8)
7744 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007745 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard5cffb642021-11-22 13:59:43 -08007746 }
7747
7748 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, qmax) {
7749 TEST_REQUIRES_ARM_NEON;
7750 GemmMicrokernelTester()
7751 .mr(4)
7752 .nr(16)
7753 .kr(1)
7754 .sr(1)
7755 .m(4)
7756 .n(16)
7757 .k(8)
7758 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007759 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard5cffb642021-11-22 13:59:43 -08007760 }
7761
7762 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, strided_cm) {
7763 TEST_REQUIRES_ARM_NEON;
7764 GemmMicrokernelTester()
7765 .mr(4)
7766 .nr(16)
7767 .kr(1)
7768 .sr(1)
7769 .m(4)
7770 .n(16)
7771 .k(8)
7772 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08007773 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard5cffb642021-11-22 13:59:43 -08007774 }
Frank Barcharde4d3f762021-12-23 15:31:43 -08007775#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Frank Barchard5cffb642021-11-22 13:59:43 -08007776
7777
Frank Barcharde4d3f762021-12-23 15:31:43 -08007778#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Frank Barchard5cffb642021-11-22 13:59:43 -08007779 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_eq_8) {
7780 TEST_REQUIRES_ARM_NEON;
7781 GemmMicrokernelTester()
7782 .mr(4)
7783 .nr(16)
7784 .kr(1)
7785 .sr(1)
7786 .m(4)
7787 .n(16)
7788 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08007789 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard5cffb642021-11-22 13:59:43 -08007790 }
7791
7792 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, strided_cn) {
7793 TEST_REQUIRES_ARM_NEON;
7794 GemmMicrokernelTester()
7795 .mr(4)
7796 .nr(16)
7797 .kr(1)
7798 .sr(1)
7799 .m(4)
7800 .n(16)
7801 .k(8)
7802 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08007803 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard5cffb642021-11-22 13:59:43 -08007804 }
7805
7806 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_eq_8_strided_a) {
7807 TEST_REQUIRES_ARM_NEON;
7808 GemmMicrokernelTester()
7809 .mr(4)
7810 .nr(16)
7811 .kr(1)
7812 .sr(1)
7813 .m(4)
7814 .n(16)
7815 .k(8)
7816 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08007817 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard5cffb642021-11-22 13:59:43 -08007818 }
7819
7820 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_eq_8_subtile) {
7821 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08007822 for (uint32_t n = 1; n <= 16; n++) {
7823 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard5cffb642021-11-22 13:59:43 -08007824 GemmMicrokernelTester()
7825 .mr(4)
7826 .nr(16)
7827 .kr(1)
7828 .sr(1)
7829 .m(m)
7830 .n(n)
7831 .k(8)
7832 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007833 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard5cffb642021-11-22 13:59:43 -08007834 }
7835 }
7836 }
7837
7838 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_eq_8_subtile_m) {
7839 TEST_REQUIRES_ARM_NEON;
7840 for (uint32_t m = 1; m <= 4; m++) {
7841 GemmMicrokernelTester()
7842 .mr(4)
7843 .nr(16)
7844 .kr(1)
7845 .sr(1)
7846 .m(m)
7847 .n(16)
7848 .k(8)
7849 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007850 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard5cffb642021-11-22 13:59:43 -08007851 }
7852 }
7853
7854 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_eq_8_subtile_n) {
7855 TEST_REQUIRES_ARM_NEON;
7856 for (uint32_t n = 1; n <= 16; n++) {
7857 GemmMicrokernelTester()
7858 .mr(4)
7859 .nr(16)
7860 .kr(1)
7861 .sr(1)
7862 .m(4)
7863 .n(n)
7864 .k(8)
7865 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007866 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard5cffb642021-11-22 13:59:43 -08007867 }
7868 }
7869
7870 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_lt_8) {
7871 TEST_REQUIRES_ARM_NEON;
7872 for (size_t k = 1; k < 8; k++) {
7873 GemmMicrokernelTester()
7874 .mr(4)
7875 .nr(16)
7876 .kr(1)
7877 .sr(1)
7878 .m(4)
7879 .n(16)
7880 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007881 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard5cffb642021-11-22 13:59:43 -08007882 }
7883 }
7884
7885 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_lt_8_strided_a) {
7886 TEST_REQUIRES_ARM_NEON;
7887 for (size_t k = 1; k < 8; k++) {
7888 GemmMicrokernelTester()
7889 .mr(4)
7890 .nr(16)
7891 .kr(1)
7892 .sr(1)
7893 .m(4)
7894 .n(16)
7895 .k(k)
7896 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08007897 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard5cffb642021-11-22 13:59:43 -08007898 }
7899 }
7900
7901 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_lt_8_subtile) {
7902 TEST_REQUIRES_ARM_NEON;
7903 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007904 for (uint32_t n = 1; n <= 16; n++) {
7905 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard5cffb642021-11-22 13:59:43 -08007906 GemmMicrokernelTester()
7907 .mr(4)
7908 .nr(16)
7909 .kr(1)
7910 .sr(1)
7911 .m(m)
7912 .n(n)
7913 .k(k)
7914 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007915 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard5cffb642021-11-22 13:59:43 -08007916 }
7917 }
7918 }
7919 }
7920
7921 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_gt_8) {
7922 TEST_REQUIRES_ARM_NEON;
7923 for (size_t k = 9; k < 16; k++) {
7924 GemmMicrokernelTester()
7925 .mr(4)
7926 .nr(16)
7927 .kr(1)
7928 .sr(1)
7929 .m(4)
7930 .n(16)
7931 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007932 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard5cffb642021-11-22 13:59:43 -08007933 }
7934 }
7935
7936 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_gt_8_strided_a) {
7937 TEST_REQUIRES_ARM_NEON;
7938 for (size_t k = 9; k < 16; k++) {
7939 GemmMicrokernelTester()
7940 .mr(4)
7941 .nr(16)
7942 .kr(1)
7943 .sr(1)
7944 .m(4)
7945 .n(16)
7946 .k(k)
7947 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08007948 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard5cffb642021-11-22 13:59:43 -08007949 }
7950 }
7951
7952 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_gt_8_subtile) {
7953 TEST_REQUIRES_ARM_NEON;
7954 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007955 for (uint32_t n = 1; n <= 16; n++) {
7956 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard5cffb642021-11-22 13:59:43 -08007957 GemmMicrokernelTester()
7958 .mr(4)
7959 .nr(16)
7960 .kr(1)
7961 .sr(1)
7962 .m(m)
7963 .n(n)
7964 .k(k)
7965 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007966 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard5cffb642021-11-22 13:59:43 -08007967 }
7968 }
7969 }
7970 }
7971
7972 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_div_8) {
7973 TEST_REQUIRES_ARM_NEON;
7974 for (size_t k = 16; k <= 80; k += 8) {
7975 GemmMicrokernelTester()
7976 .mr(4)
7977 .nr(16)
7978 .kr(1)
7979 .sr(1)
7980 .m(4)
7981 .n(16)
7982 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007983 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard5cffb642021-11-22 13:59:43 -08007984 }
7985 }
7986
7987 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_div_8_strided_a) {
7988 TEST_REQUIRES_ARM_NEON;
7989 for (size_t k = 16; k <= 80; k += 8) {
7990 GemmMicrokernelTester()
7991 .mr(4)
7992 .nr(16)
7993 .kr(1)
7994 .sr(1)
7995 .m(4)
7996 .n(16)
7997 .k(k)
7998 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08007999 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard5cffb642021-11-22 13:59:43 -08008000 }
8001 }
8002
8003 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_div_8_subtile) {
8004 TEST_REQUIRES_ARM_NEON;
8005 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008006 for (uint32_t n = 1; n <= 16; n++) {
8007 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard5cffb642021-11-22 13:59:43 -08008008 GemmMicrokernelTester()
8009 .mr(4)
8010 .nr(16)
8011 .kr(1)
8012 .sr(1)
8013 .m(m)
8014 .n(n)
8015 .k(k)
8016 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008017 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard5cffb642021-11-22 13:59:43 -08008018 }
8019 }
8020 }
8021 }
8022
8023 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_gt_16) {
8024 TEST_REQUIRES_ARM_NEON;
8025 for (uint32_t n = 17; n < 32; n++) {
8026 for (size_t k = 1; k <= 40; k += 9) {
8027 GemmMicrokernelTester()
8028 .mr(4)
8029 .nr(16)
8030 .kr(1)
8031 .sr(1)
8032 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008033 .n(n)
Frank Barchard5cffb642021-11-22 13:59:43 -08008034 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008035 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard5cffb642021-11-22 13:59:43 -08008036 }
8037 }
8038 }
8039
8040 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_gt_16_strided_cn) {
8041 TEST_REQUIRES_ARM_NEON;
8042 for (uint32_t n = 17; n < 32; n++) {
8043 for (size_t k = 1; k <= 40; k += 9) {
8044 GemmMicrokernelTester()
8045 .mr(4)
8046 .nr(16)
8047 .kr(1)
8048 .sr(1)
8049 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008050 .n(n)
Frank Barchard5cffb642021-11-22 13:59:43 -08008051 .k(k)
8052 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08008053 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard5cffb642021-11-22 13:59:43 -08008054 }
8055 }
8056 }
8057
8058 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_gt_16_strided_a) {
8059 TEST_REQUIRES_ARM_NEON;
8060 for (uint32_t n = 17; n < 32; n++) {
8061 for (size_t k = 1; k <= 40; k += 9) {
8062 GemmMicrokernelTester()
8063 .mr(4)
8064 .nr(16)
8065 .kr(1)
8066 .sr(1)
8067 .m(4)
8068 .n(n)
8069 .k(k)
8070 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08008071 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard5cffb642021-11-22 13:59:43 -08008072 }
8073 }
8074 }
8075
8076 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_gt_16_subtile) {
8077 TEST_REQUIRES_ARM_NEON;
8078 for (uint32_t n = 17; n < 32; n++) {
8079 for (size_t k = 1; k <= 40; k += 9) {
8080 for (uint32_t m = 1; m <= 4; m++) {
8081 GemmMicrokernelTester()
8082 .mr(4)
8083 .nr(16)
8084 .kr(1)
8085 .sr(1)
8086 .m(m)
8087 .n(n)
8088 .k(k)
8089 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008090 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard5cffb642021-11-22 13:59:43 -08008091 }
8092 }
8093 }
8094 }
8095
8096 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_div_16) {
8097 TEST_REQUIRES_ARM_NEON;
8098 for (uint32_t n = 32; n <= 48; n += 16) {
8099 for (size_t k = 1; k <= 40; k += 9) {
8100 GemmMicrokernelTester()
8101 .mr(4)
8102 .nr(16)
8103 .kr(1)
8104 .sr(1)
8105 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008106 .n(n)
Frank Barchard5cffb642021-11-22 13:59:43 -08008107 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008108 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard5cffb642021-11-22 13:59:43 -08008109 }
8110 }
8111 }
8112
8113 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_div_16_strided_cn) {
8114 TEST_REQUIRES_ARM_NEON;
8115 for (uint32_t n = 32; n <= 48; n += 16) {
8116 for (size_t k = 1; k <= 40; k += 9) {
8117 GemmMicrokernelTester()
8118 .mr(4)
8119 .nr(16)
8120 .kr(1)
8121 .sr(1)
8122 .m(4)
8123 .n(n)
8124 .k(k)
8125 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08008126 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard5cffb642021-11-22 13:59:43 -08008127 }
8128 }
8129 }
8130
8131 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_div_16_strided_a) {
8132 TEST_REQUIRES_ARM_NEON;
8133 for (uint32_t n = 32; n <= 48; n += 16) {
8134 for (size_t k = 1; k <= 40; k += 9) {
8135 GemmMicrokernelTester()
8136 .mr(4)
8137 .nr(16)
8138 .kr(1)
8139 .sr(1)
8140 .m(4)
8141 .n(n)
8142 .k(k)
8143 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08008144 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard5cffb642021-11-22 13:59:43 -08008145 }
8146 }
8147 }
8148
8149 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_div_16_subtile) {
8150 TEST_REQUIRES_ARM_NEON;
8151 for (uint32_t n = 32; n <= 48; n += 16) {
8152 for (size_t k = 1; k <= 40; k += 9) {
8153 for (uint32_t m = 1; m <= 4; m++) {
8154 GemmMicrokernelTester()
8155 .mr(4)
8156 .nr(16)
8157 .kr(1)
8158 .sr(1)
8159 .m(m)
8160 .n(n)
8161 .k(k)
8162 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008163 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard5cffb642021-11-22 13:59:43 -08008164 }
8165 }
8166 }
8167 }
8168
8169 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, strided_cm_subtile) {
8170 TEST_REQUIRES_ARM_NEON;
8171 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008172 for (uint32_t n = 1; n <= 16; n++) {
8173 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard5cffb642021-11-22 13:59:43 -08008174 GemmMicrokernelTester()
8175 .mr(4)
8176 .nr(16)
8177 .kr(1)
8178 .sr(1)
8179 .m(m)
8180 .n(n)
8181 .k(k)
8182 .cm_stride(19)
8183 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008184 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard5cffb642021-11-22 13:59:43 -08008185 }
8186 }
8187 }
8188 }
8189
8190 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, qmin) {
8191 TEST_REQUIRES_ARM_NEON;
8192 GemmMicrokernelTester()
8193 .mr(4)
8194 .nr(16)
8195 .kr(1)
8196 .sr(1)
8197 .m(4)
8198 .n(16)
8199 .k(8)
8200 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008201 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard5cffb642021-11-22 13:59:43 -08008202 }
8203
8204 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, qmax) {
8205 TEST_REQUIRES_ARM_NEON;
8206 GemmMicrokernelTester()
8207 .mr(4)
8208 .nr(16)
8209 .kr(1)
8210 .sr(1)
8211 .m(4)
8212 .n(16)
8213 .k(8)
8214 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008215 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard5cffb642021-11-22 13:59:43 -08008216 }
8217
8218 TEST(QS8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, strided_cm) {
8219 TEST_REQUIRES_ARM_NEON;
8220 GemmMicrokernelTester()
8221 .mr(4)
8222 .nr(16)
8223 .kr(1)
8224 .sr(1)
8225 .m(4)
8226 .n(16)
8227 .k(8)
8228 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08008229 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard5cffb642021-11-22 13:59:43 -08008230 }
Frank Barcharde4d3f762021-12-23 15:31:43 -08008231#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Frank Barchard5cffb642021-11-22 13:59:43 -08008232
8233
Frank Barcharde4d3f762021-12-23 15:31:43 -08008234#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Frank Barchard1a0b2762021-06-29 18:37:59 -07008235 TEST(QS8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32, k_eq_4) {
8236 TEST_REQUIRES_ARM_NEON_DOT;
8237 GemmMicrokernelTester()
8238 .mr(1)
8239 .nr(16)
8240 .kr(4)
8241 .sr(1)
8242 .m(1)
8243 .n(16)
8244 .k(4)
Marat Dukhan50323b82022-01-11 00:12:01 -08008245 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -07008246 }
8247
8248 TEST(QS8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32, strided_cn) {
8249 TEST_REQUIRES_ARM_NEON_DOT;
8250 GemmMicrokernelTester()
8251 .mr(1)
8252 .nr(16)
8253 .kr(4)
8254 .sr(1)
8255 .m(1)
8256 .n(16)
8257 .k(4)
8258 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08008259 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -07008260 }
8261
8262 TEST(QS8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32, k_eq_4_strided_a) {
8263 TEST_REQUIRES_ARM_NEON_DOT;
8264 GemmMicrokernelTester()
8265 .mr(1)
8266 .nr(16)
8267 .kr(4)
8268 .sr(1)
8269 .m(1)
8270 .n(16)
8271 .k(4)
8272 .a_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08008273 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -07008274 }
8275
8276 TEST(QS8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32, k_eq_4_subtile) {
8277 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng83844ae2022-01-14 09:52:25 -08008278 for (uint32_t n = 1; n <= 16; n++) {
8279 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard1a0b2762021-06-29 18:37:59 -07008280 GemmMicrokernelTester()
8281 .mr(1)
8282 .nr(16)
8283 .kr(4)
8284 .sr(1)
8285 .m(m)
8286 .n(n)
8287 .k(4)
8288 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008289 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -07008290 }
8291 }
8292 }
8293
8294 TEST(QS8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32, k_eq_4_subtile_m) {
8295 TEST_REQUIRES_ARM_NEON_DOT;
8296 for (uint32_t m = 1; m <= 1; m++) {
8297 GemmMicrokernelTester()
8298 .mr(1)
8299 .nr(16)
8300 .kr(4)
8301 .sr(1)
8302 .m(m)
8303 .n(16)
8304 .k(4)
8305 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008306 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -07008307 }
8308 }
8309
8310 TEST(QS8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32, k_eq_4_subtile_n) {
8311 TEST_REQUIRES_ARM_NEON_DOT;
8312 for (uint32_t n = 1; n <= 16; n++) {
8313 GemmMicrokernelTester()
8314 .mr(1)
8315 .nr(16)
8316 .kr(4)
8317 .sr(1)
8318 .m(1)
8319 .n(n)
8320 .k(4)
8321 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008322 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -07008323 }
8324 }
8325
8326 TEST(QS8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32, k_lt_4) {
8327 TEST_REQUIRES_ARM_NEON_DOT;
8328 for (size_t k = 1; k < 4; k++) {
8329 GemmMicrokernelTester()
8330 .mr(1)
8331 .nr(16)
8332 .kr(4)
8333 .sr(1)
8334 .m(1)
8335 .n(16)
8336 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008337 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -07008338 }
8339 }
8340
8341 TEST(QS8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32, k_lt_4_strided_a) {
8342 TEST_REQUIRES_ARM_NEON_DOT;
8343 for (size_t k = 1; k < 4; k++) {
8344 GemmMicrokernelTester()
8345 .mr(1)
8346 .nr(16)
8347 .kr(4)
8348 .sr(1)
8349 .m(1)
8350 .n(16)
8351 .k(k)
8352 .a_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08008353 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -07008354 }
8355 }
8356
8357 TEST(QS8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32, k_lt_4_subtile) {
8358 TEST_REQUIRES_ARM_NEON_DOT;
8359 for (size_t k = 1; k < 4; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008360 for (uint32_t n = 1; n <= 16; n++) {
8361 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard1a0b2762021-06-29 18:37:59 -07008362 GemmMicrokernelTester()
8363 .mr(1)
8364 .nr(16)
8365 .kr(4)
8366 .sr(1)
8367 .m(m)
8368 .n(n)
8369 .k(k)
8370 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008371 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -07008372 }
8373 }
8374 }
8375 }
8376
8377 TEST(QS8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32, k_gt_4) {
8378 TEST_REQUIRES_ARM_NEON_DOT;
8379 for (size_t k = 5; k < 8; k++) {
8380 GemmMicrokernelTester()
8381 .mr(1)
8382 .nr(16)
8383 .kr(4)
8384 .sr(1)
8385 .m(1)
8386 .n(16)
8387 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008388 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -07008389 }
8390 }
8391
8392 TEST(QS8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32, k_gt_4_strided_a) {
8393 TEST_REQUIRES_ARM_NEON_DOT;
8394 for (size_t k = 5; k < 8; k++) {
8395 GemmMicrokernelTester()
8396 .mr(1)
8397 .nr(16)
8398 .kr(4)
8399 .sr(1)
8400 .m(1)
8401 .n(16)
8402 .k(k)
8403 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08008404 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -07008405 }
8406 }
8407
8408 TEST(QS8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32, k_gt_4_subtile) {
8409 TEST_REQUIRES_ARM_NEON_DOT;
8410 for (size_t k = 5; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008411 for (uint32_t n = 1; n <= 16; n++) {
8412 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard1a0b2762021-06-29 18:37:59 -07008413 GemmMicrokernelTester()
8414 .mr(1)
8415 .nr(16)
8416 .kr(4)
8417 .sr(1)
8418 .m(m)
8419 .n(n)
8420 .k(k)
8421 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008422 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -07008423 }
8424 }
8425 }
8426 }
8427
8428 TEST(QS8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32, k_div_4) {
8429 TEST_REQUIRES_ARM_NEON_DOT;
8430 for (size_t k = 8; k <= 40; k += 4) {
8431 GemmMicrokernelTester()
8432 .mr(1)
8433 .nr(16)
8434 .kr(4)
8435 .sr(1)
8436 .m(1)
8437 .n(16)
8438 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008439 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -07008440 }
8441 }
8442
8443 TEST(QS8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32, k_div_4_strided_a) {
8444 TEST_REQUIRES_ARM_NEON_DOT;
8445 for (size_t k = 8; k <= 40; k += 4) {
8446 GemmMicrokernelTester()
8447 .mr(1)
8448 .nr(16)
8449 .kr(4)
8450 .sr(1)
8451 .m(1)
8452 .n(16)
8453 .k(k)
8454 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08008455 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -07008456 }
8457 }
8458
8459 TEST(QS8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32, k_div_4_subtile) {
8460 TEST_REQUIRES_ARM_NEON_DOT;
8461 for (size_t k = 8; k <= 40; k += 4) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008462 for (uint32_t n = 1; n <= 16; n++) {
8463 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard1a0b2762021-06-29 18:37:59 -07008464 GemmMicrokernelTester()
8465 .mr(1)
8466 .nr(16)
8467 .kr(4)
8468 .sr(1)
8469 .m(m)
8470 .n(n)
8471 .k(k)
8472 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008473 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -07008474 }
8475 }
8476 }
8477 }
8478
8479 TEST(QS8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32, n_gt_16) {
8480 TEST_REQUIRES_ARM_NEON_DOT;
8481 for (uint32_t n = 17; n < 32; n++) {
8482 for (size_t k = 1; k <= 20; k += 5) {
8483 GemmMicrokernelTester()
8484 .mr(1)
8485 .nr(16)
8486 .kr(4)
8487 .sr(1)
8488 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008489 .n(n)
Frank Barchard1a0b2762021-06-29 18:37:59 -07008490 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008491 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -07008492 }
8493 }
8494 }
8495
8496 TEST(QS8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32, n_gt_16_strided_cn) {
8497 TEST_REQUIRES_ARM_NEON_DOT;
8498 for (uint32_t n = 17; n < 32; n++) {
8499 for (size_t k = 1; k <= 20; k += 5) {
8500 GemmMicrokernelTester()
8501 .mr(1)
8502 .nr(16)
8503 .kr(4)
8504 .sr(1)
8505 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008506 .n(n)
Frank Barchard1a0b2762021-06-29 18:37:59 -07008507 .k(k)
8508 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08008509 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -07008510 }
8511 }
8512 }
8513
8514 TEST(QS8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32, n_gt_16_strided_a) {
8515 TEST_REQUIRES_ARM_NEON_DOT;
8516 for (uint32_t n = 17; n < 32; n++) {
8517 for (size_t k = 1; k <= 20; k += 5) {
8518 GemmMicrokernelTester()
8519 .mr(1)
8520 .nr(16)
8521 .kr(4)
8522 .sr(1)
8523 .m(1)
8524 .n(n)
8525 .k(k)
8526 .a_stride(23)
Marat Dukhan50323b82022-01-11 00:12:01 -08008527 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -07008528 }
8529 }
8530 }
8531
8532 TEST(QS8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32, n_gt_16_subtile) {
8533 TEST_REQUIRES_ARM_NEON_DOT;
8534 for (uint32_t n = 17; n < 32; n++) {
8535 for (size_t k = 1; k <= 20; k += 5) {
8536 for (uint32_t m = 1; m <= 1; m++) {
8537 GemmMicrokernelTester()
8538 .mr(1)
8539 .nr(16)
8540 .kr(4)
8541 .sr(1)
8542 .m(m)
8543 .n(n)
8544 .k(k)
8545 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008546 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -07008547 }
8548 }
8549 }
8550 }
8551
8552 TEST(QS8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32, n_div_16) {
8553 TEST_REQUIRES_ARM_NEON_DOT;
8554 for (uint32_t n = 32; n <= 48; n += 16) {
8555 for (size_t k = 1; k <= 20; k += 5) {
8556 GemmMicrokernelTester()
8557 .mr(1)
8558 .nr(16)
8559 .kr(4)
8560 .sr(1)
8561 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008562 .n(n)
Frank Barchard1a0b2762021-06-29 18:37:59 -07008563 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008564 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -07008565 }
8566 }
8567 }
8568
8569 TEST(QS8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32, n_div_16_strided_cn) {
8570 TEST_REQUIRES_ARM_NEON_DOT;
8571 for (uint32_t n = 32; n <= 48; n += 16) {
8572 for (size_t k = 1; k <= 20; k += 5) {
8573 GemmMicrokernelTester()
8574 .mr(1)
8575 .nr(16)
8576 .kr(4)
8577 .sr(1)
8578 .m(1)
8579 .n(n)
8580 .k(k)
8581 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08008582 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -07008583 }
8584 }
8585 }
8586
8587 TEST(QS8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32, n_div_16_strided_a) {
8588 TEST_REQUIRES_ARM_NEON_DOT;
8589 for (uint32_t n = 32; n <= 48; n += 16) {
8590 for (size_t k = 1; k <= 20; k += 5) {
8591 GemmMicrokernelTester()
8592 .mr(1)
8593 .nr(16)
8594 .kr(4)
8595 .sr(1)
8596 .m(1)
8597 .n(n)
8598 .k(k)
8599 .a_stride(23)
Marat Dukhan50323b82022-01-11 00:12:01 -08008600 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -07008601 }
8602 }
8603 }
8604
8605 TEST(QS8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32, n_div_16_subtile) {
8606 TEST_REQUIRES_ARM_NEON_DOT;
8607 for (uint32_t n = 32; n <= 48; n += 16) {
8608 for (size_t k = 1; k <= 20; k += 5) {
8609 for (uint32_t m = 1; m <= 1; m++) {
8610 GemmMicrokernelTester()
8611 .mr(1)
8612 .nr(16)
8613 .kr(4)
8614 .sr(1)
8615 .m(m)
8616 .n(n)
8617 .k(k)
8618 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008619 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -07008620 }
8621 }
8622 }
8623 }
8624
8625 TEST(QS8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32, strided_cm_subtile) {
8626 TEST_REQUIRES_ARM_NEON_DOT;
8627 for (size_t k = 1; k <= 20; k += 5) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008628 for (uint32_t n = 1; n <= 16; n++) {
8629 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard1a0b2762021-06-29 18:37:59 -07008630 GemmMicrokernelTester()
8631 .mr(1)
8632 .nr(16)
8633 .kr(4)
8634 .sr(1)
8635 .m(m)
8636 .n(n)
8637 .k(k)
8638 .cm_stride(19)
8639 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008640 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -07008641 }
8642 }
8643 }
8644 }
8645
8646 TEST(QS8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32, qmin) {
8647 TEST_REQUIRES_ARM_NEON_DOT;
8648 GemmMicrokernelTester()
8649 .mr(1)
8650 .nr(16)
8651 .kr(4)
8652 .sr(1)
8653 .m(1)
8654 .n(16)
8655 .k(4)
8656 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008657 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -07008658 }
8659
8660 TEST(QS8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32, qmax) {
8661 TEST_REQUIRES_ARM_NEON_DOT;
8662 GemmMicrokernelTester()
8663 .mr(1)
8664 .nr(16)
8665 .kr(4)
8666 .sr(1)
8667 .m(1)
8668 .n(16)
8669 .k(4)
8670 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008671 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -07008672 }
8673
8674 TEST(QS8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD32, strided_cm) {
8675 TEST_REQUIRES_ARM_NEON_DOT;
8676 GemmMicrokernelTester()
8677 .mr(1)
8678 .nr(16)
8679 .kr(4)
8680 .sr(1)
8681 .m(1)
8682 .n(16)
8683 .k(4)
8684 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08008685 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -07008686 }
Frank Barcharde4d3f762021-12-23 15:31:43 -08008687#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Frank Barchard1a0b2762021-06-29 18:37:59 -07008688
8689
Frank Barcharde4d3f762021-12-23 15:31:43 -08008690#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Frank Barchard1a0b2762021-06-29 18:37:59 -07008691 TEST(QS8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64, k_eq_8) {
8692 TEST_REQUIRES_ARM_NEON_DOT;
8693 GemmMicrokernelTester()
8694 .mr(1)
8695 .nr(16)
8696 .kr(4)
8697 .sr(1)
8698 .m(1)
8699 .n(16)
8700 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08008701 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -07008702 }
8703
8704 TEST(QS8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64, strided_cn) {
8705 TEST_REQUIRES_ARM_NEON_DOT;
8706 GemmMicrokernelTester()
8707 .mr(1)
8708 .nr(16)
8709 .kr(4)
8710 .sr(1)
8711 .m(1)
8712 .n(16)
8713 .k(8)
8714 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08008715 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -07008716 }
8717
8718 TEST(QS8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64, k_eq_8_strided_a) {
8719 TEST_REQUIRES_ARM_NEON_DOT;
8720 GemmMicrokernelTester()
8721 .mr(1)
8722 .nr(16)
8723 .kr(4)
8724 .sr(1)
8725 .m(1)
8726 .n(16)
8727 .k(8)
8728 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08008729 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -07008730 }
8731
8732 TEST(QS8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64, k_eq_8_subtile) {
8733 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng83844ae2022-01-14 09:52:25 -08008734 for (uint32_t n = 1; n <= 16; n++) {
8735 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard1a0b2762021-06-29 18:37:59 -07008736 GemmMicrokernelTester()
8737 .mr(1)
8738 .nr(16)
8739 .kr(4)
8740 .sr(1)
8741 .m(m)
8742 .n(n)
8743 .k(8)
8744 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008745 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -07008746 }
8747 }
8748 }
8749
8750 TEST(QS8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64, k_eq_8_subtile_m) {
8751 TEST_REQUIRES_ARM_NEON_DOT;
8752 for (uint32_t m = 1; m <= 1; m++) {
8753 GemmMicrokernelTester()
8754 .mr(1)
8755 .nr(16)
8756 .kr(4)
8757 .sr(1)
8758 .m(m)
8759 .n(16)
8760 .k(8)
8761 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008762 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -07008763 }
8764 }
8765
8766 TEST(QS8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64, k_eq_8_subtile_n) {
8767 TEST_REQUIRES_ARM_NEON_DOT;
8768 for (uint32_t n = 1; n <= 16; n++) {
8769 GemmMicrokernelTester()
8770 .mr(1)
8771 .nr(16)
8772 .kr(4)
8773 .sr(1)
8774 .m(1)
8775 .n(n)
8776 .k(8)
8777 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008778 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -07008779 }
8780 }
8781
8782 TEST(QS8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64, k_lt_8) {
8783 TEST_REQUIRES_ARM_NEON_DOT;
8784 for (size_t k = 1; k < 8; k++) {
8785 GemmMicrokernelTester()
8786 .mr(1)
8787 .nr(16)
8788 .kr(4)
8789 .sr(1)
8790 .m(1)
8791 .n(16)
8792 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008793 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -07008794 }
8795 }
8796
8797 TEST(QS8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64, k_lt_8_strided_a) {
8798 TEST_REQUIRES_ARM_NEON_DOT;
8799 for (size_t k = 1; k < 8; k++) {
8800 GemmMicrokernelTester()
8801 .mr(1)
8802 .nr(16)
8803 .kr(4)
8804 .sr(1)
8805 .m(1)
8806 .n(16)
8807 .k(k)
8808 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08008809 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -07008810 }
8811 }
8812
8813 TEST(QS8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64, k_lt_8_subtile) {
8814 TEST_REQUIRES_ARM_NEON_DOT;
8815 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008816 for (uint32_t n = 1; n <= 16; n++) {
8817 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard1a0b2762021-06-29 18:37:59 -07008818 GemmMicrokernelTester()
8819 .mr(1)
8820 .nr(16)
8821 .kr(4)
8822 .sr(1)
8823 .m(m)
8824 .n(n)
8825 .k(k)
8826 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008827 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -07008828 }
8829 }
8830 }
8831 }
8832
8833 TEST(QS8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64, k_gt_8) {
8834 TEST_REQUIRES_ARM_NEON_DOT;
8835 for (size_t k = 9; k < 16; k++) {
8836 GemmMicrokernelTester()
8837 .mr(1)
8838 .nr(16)
8839 .kr(4)
8840 .sr(1)
8841 .m(1)
8842 .n(16)
8843 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008844 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -07008845 }
8846 }
8847
8848 TEST(QS8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64, k_gt_8_strided_a) {
8849 TEST_REQUIRES_ARM_NEON_DOT;
8850 for (size_t k = 9; k < 16; k++) {
8851 GemmMicrokernelTester()
8852 .mr(1)
8853 .nr(16)
8854 .kr(4)
8855 .sr(1)
8856 .m(1)
8857 .n(16)
8858 .k(k)
8859 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08008860 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -07008861 }
8862 }
8863
8864 TEST(QS8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64, k_gt_8_subtile) {
8865 TEST_REQUIRES_ARM_NEON_DOT;
8866 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008867 for (uint32_t n = 1; n <= 16; n++) {
8868 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard1a0b2762021-06-29 18:37:59 -07008869 GemmMicrokernelTester()
8870 .mr(1)
8871 .nr(16)
8872 .kr(4)
8873 .sr(1)
8874 .m(m)
8875 .n(n)
8876 .k(k)
8877 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008878 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -07008879 }
8880 }
8881 }
8882 }
8883
8884 TEST(QS8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64, k_div_8) {
8885 TEST_REQUIRES_ARM_NEON_DOT;
8886 for (size_t k = 16; k <= 80; k += 8) {
8887 GemmMicrokernelTester()
8888 .mr(1)
8889 .nr(16)
8890 .kr(4)
8891 .sr(1)
8892 .m(1)
8893 .n(16)
8894 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008895 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -07008896 }
8897 }
8898
8899 TEST(QS8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64, k_div_8_strided_a) {
8900 TEST_REQUIRES_ARM_NEON_DOT;
8901 for (size_t k = 16; k <= 80; k += 8) {
8902 GemmMicrokernelTester()
8903 .mr(1)
8904 .nr(16)
8905 .kr(4)
8906 .sr(1)
8907 .m(1)
8908 .n(16)
8909 .k(k)
8910 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08008911 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -07008912 }
8913 }
8914
8915 TEST(QS8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64, k_div_8_subtile) {
8916 TEST_REQUIRES_ARM_NEON_DOT;
8917 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008918 for (uint32_t n = 1; n <= 16; n++) {
8919 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard1a0b2762021-06-29 18:37:59 -07008920 GemmMicrokernelTester()
8921 .mr(1)
8922 .nr(16)
8923 .kr(4)
8924 .sr(1)
8925 .m(m)
8926 .n(n)
8927 .k(k)
8928 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008929 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -07008930 }
8931 }
8932 }
8933 }
8934
8935 TEST(QS8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64, n_gt_16) {
8936 TEST_REQUIRES_ARM_NEON_DOT;
8937 for (uint32_t n = 17; n < 32; n++) {
8938 for (size_t k = 1; k <= 40; k += 9) {
8939 GemmMicrokernelTester()
8940 .mr(1)
8941 .nr(16)
8942 .kr(4)
8943 .sr(1)
8944 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008945 .n(n)
Frank Barchard1a0b2762021-06-29 18:37:59 -07008946 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008947 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -07008948 }
8949 }
8950 }
8951
8952 TEST(QS8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64, n_gt_16_strided_cn) {
8953 TEST_REQUIRES_ARM_NEON_DOT;
8954 for (uint32_t n = 17; n < 32; n++) {
8955 for (size_t k = 1; k <= 40; k += 9) {
8956 GemmMicrokernelTester()
8957 .mr(1)
8958 .nr(16)
8959 .kr(4)
8960 .sr(1)
8961 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008962 .n(n)
Frank Barchard1a0b2762021-06-29 18:37:59 -07008963 .k(k)
8964 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08008965 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -07008966 }
8967 }
8968 }
8969
8970 TEST(QS8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64, n_gt_16_strided_a) {
8971 TEST_REQUIRES_ARM_NEON_DOT;
8972 for (uint32_t n = 17; n < 32; n++) {
8973 for (size_t k = 1; k <= 40; k += 9) {
8974 GemmMicrokernelTester()
8975 .mr(1)
8976 .nr(16)
8977 .kr(4)
8978 .sr(1)
8979 .m(1)
8980 .n(n)
8981 .k(k)
8982 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08008983 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -07008984 }
8985 }
8986 }
8987
8988 TEST(QS8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64, n_gt_16_subtile) {
8989 TEST_REQUIRES_ARM_NEON_DOT;
8990 for (uint32_t n = 17; n < 32; n++) {
8991 for (size_t k = 1; k <= 40; k += 9) {
8992 for (uint32_t m = 1; m <= 1; m++) {
8993 GemmMicrokernelTester()
8994 .mr(1)
8995 .nr(16)
8996 .kr(4)
8997 .sr(1)
8998 .m(m)
8999 .n(n)
9000 .k(k)
9001 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009002 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -07009003 }
9004 }
9005 }
9006 }
9007
9008 TEST(QS8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64, n_div_16) {
9009 TEST_REQUIRES_ARM_NEON_DOT;
9010 for (uint32_t n = 32; n <= 48; n += 16) {
9011 for (size_t k = 1; k <= 40; k += 9) {
9012 GemmMicrokernelTester()
9013 .mr(1)
9014 .nr(16)
9015 .kr(4)
9016 .sr(1)
9017 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009018 .n(n)
Frank Barchard1a0b2762021-06-29 18:37:59 -07009019 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009020 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -07009021 }
9022 }
9023 }
9024
9025 TEST(QS8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64, n_div_16_strided_cn) {
9026 TEST_REQUIRES_ARM_NEON_DOT;
9027 for (uint32_t n = 32; n <= 48; n += 16) {
9028 for (size_t k = 1; k <= 40; k += 9) {
9029 GemmMicrokernelTester()
9030 .mr(1)
9031 .nr(16)
9032 .kr(4)
9033 .sr(1)
9034 .m(1)
9035 .n(n)
9036 .k(k)
9037 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08009038 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -07009039 }
9040 }
9041 }
9042
9043 TEST(QS8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64, n_div_16_strided_a) {
9044 TEST_REQUIRES_ARM_NEON_DOT;
9045 for (uint32_t n = 32; n <= 48; n += 16) {
9046 for (size_t k = 1; k <= 40; k += 9) {
9047 GemmMicrokernelTester()
9048 .mr(1)
9049 .nr(16)
9050 .kr(4)
9051 .sr(1)
9052 .m(1)
9053 .n(n)
9054 .k(k)
9055 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08009056 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -07009057 }
9058 }
9059 }
9060
9061 TEST(QS8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64, n_div_16_subtile) {
9062 TEST_REQUIRES_ARM_NEON_DOT;
9063 for (uint32_t n = 32; n <= 48; n += 16) {
9064 for (size_t k = 1; k <= 40; k += 9) {
9065 for (uint32_t m = 1; m <= 1; m++) {
9066 GemmMicrokernelTester()
9067 .mr(1)
9068 .nr(16)
9069 .kr(4)
9070 .sr(1)
9071 .m(m)
9072 .n(n)
9073 .k(k)
9074 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009075 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -07009076 }
9077 }
9078 }
9079 }
9080
9081 TEST(QS8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64, strided_cm_subtile) {
9082 TEST_REQUIRES_ARM_NEON_DOT;
9083 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009084 for (uint32_t n = 1; n <= 16; n++) {
9085 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard1a0b2762021-06-29 18:37:59 -07009086 GemmMicrokernelTester()
9087 .mr(1)
9088 .nr(16)
9089 .kr(4)
9090 .sr(1)
9091 .m(m)
9092 .n(n)
9093 .k(k)
9094 .cm_stride(19)
9095 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009096 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -07009097 }
9098 }
9099 }
9100 }
9101
9102 TEST(QS8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64, qmin) {
9103 TEST_REQUIRES_ARM_NEON_DOT;
9104 GemmMicrokernelTester()
9105 .mr(1)
9106 .nr(16)
9107 .kr(4)
9108 .sr(1)
9109 .m(1)
9110 .n(16)
9111 .k(8)
9112 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009113 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -07009114 }
9115
9116 TEST(QS8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64, qmax) {
9117 TEST_REQUIRES_ARM_NEON_DOT;
9118 GemmMicrokernelTester()
9119 .mr(1)
9120 .nr(16)
9121 .kr(4)
9122 .sr(1)
9123 .m(1)
9124 .n(16)
9125 .k(8)
9126 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009127 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -07009128 }
9129
9130 TEST(QS8_GEMM_MINMAX_FP32_1X16C4__AARCH64_NEONDOT_LD64, strided_cm) {
9131 TEST_REQUIRES_ARM_NEON_DOT;
9132 GemmMicrokernelTester()
9133 .mr(1)
9134 .nr(16)
9135 .kr(4)
9136 .sr(1)
9137 .m(1)
9138 .n(16)
9139 .k(8)
9140 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08009141 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Frank Barchard1a0b2762021-06-29 18:37:59 -07009142 }
Frank Barcharde4d3f762021-12-23 15:31:43 -08009143#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Frank Barchard1a0b2762021-06-29 18:37:59 -07009144
9145
Marat Dukhan2d3c97c2021-06-25 18:00:28 -07009146#if XNN_ARCH_ARM || XNN_ARCH_ARM64
9147 TEST(QS8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_eq_8) {
9148 TEST_REQUIRES_ARM_NEON;
9149 GemmMicrokernelTester()
9150 .mr(1)
9151 .nr(16)
9152 .kr(1)
9153 .sr(1)
9154 .m(1)
9155 .n(16)
9156 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08009157 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -07009158 }
9159
9160 TEST(QS8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, strided_cn) {
9161 TEST_REQUIRES_ARM_NEON;
9162 GemmMicrokernelTester()
9163 .mr(1)
9164 .nr(16)
9165 .kr(1)
9166 .sr(1)
9167 .m(1)
9168 .n(16)
9169 .k(8)
9170 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08009171 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -07009172 }
9173
9174 TEST(QS8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_eq_8_strided_a) {
9175 TEST_REQUIRES_ARM_NEON;
9176 GemmMicrokernelTester()
9177 .mr(1)
9178 .nr(16)
9179 .kr(1)
9180 .sr(1)
9181 .m(1)
9182 .n(16)
9183 .k(8)
9184 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08009185 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -07009186 }
9187
9188 TEST(QS8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_eq_8_subtile) {
9189 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08009190 for (uint32_t n = 1; n <= 16; n++) {
9191 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -07009192 GemmMicrokernelTester()
9193 .mr(1)
9194 .nr(16)
9195 .kr(1)
9196 .sr(1)
9197 .m(m)
9198 .n(n)
9199 .k(8)
9200 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009201 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -07009202 }
9203 }
9204 }
9205
9206 TEST(QS8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_eq_8_subtile_m) {
9207 TEST_REQUIRES_ARM_NEON;
9208 for (uint32_t m = 1; m <= 1; m++) {
9209 GemmMicrokernelTester()
9210 .mr(1)
9211 .nr(16)
9212 .kr(1)
9213 .sr(1)
9214 .m(m)
9215 .n(16)
9216 .k(8)
9217 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009218 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -07009219 }
9220 }
9221
9222 TEST(QS8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_eq_8_subtile_n) {
9223 TEST_REQUIRES_ARM_NEON;
9224 for (uint32_t n = 1; n <= 16; n++) {
9225 GemmMicrokernelTester()
9226 .mr(1)
9227 .nr(16)
9228 .kr(1)
9229 .sr(1)
9230 .m(1)
9231 .n(n)
9232 .k(8)
9233 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009234 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -07009235 }
9236 }
9237
9238 TEST(QS8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_lt_8) {
9239 TEST_REQUIRES_ARM_NEON;
9240 for (size_t k = 1; k < 8; k++) {
9241 GemmMicrokernelTester()
9242 .mr(1)
9243 .nr(16)
9244 .kr(1)
9245 .sr(1)
9246 .m(1)
9247 .n(16)
9248 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009249 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -07009250 }
9251 }
9252
9253 TEST(QS8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_lt_8_strided_a) {
9254 TEST_REQUIRES_ARM_NEON;
9255 for (size_t k = 1; k < 8; k++) {
9256 GemmMicrokernelTester()
9257 .mr(1)
9258 .nr(16)
9259 .kr(1)
9260 .sr(1)
9261 .m(1)
9262 .n(16)
9263 .k(k)
9264 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08009265 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -07009266 }
9267 }
9268
9269 TEST(QS8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_lt_8_subtile) {
9270 TEST_REQUIRES_ARM_NEON;
9271 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009272 for (uint32_t n = 1; n <= 16; n++) {
9273 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -07009274 GemmMicrokernelTester()
9275 .mr(1)
9276 .nr(16)
9277 .kr(1)
9278 .sr(1)
9279 .m(m)
9280 .n(n)
9281 .k(k)
9282 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009283 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -07009284 }
9285 }
9286 }
9287 }
9288
9289 TEST(QS8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_gt_8) {
9290 TEST_REQUIRES_ARM_NEON;
9291 for (size_t k = 9; k < 16; k++) {
9292 GemmMicrokernelTester()
9293 .mr(1)
9294 .nr(16)
9295 .kr(1)
9296 .sr(1)
9297 .m(1)
9298 .n(16)
9299 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009300 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -07009301 }
9302 }
9303
9304 TEST(QS8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_gt_8_strided_a) {
9305 TEST_REQUIRES_ARM_NEON;
9306 for (size_t k = 9; k < 16; k++) {
9307 GemmMicrokernelTester()
9308 .mr(1)
9309 .nr(16)
9310 .kr(1)
9311 .sr(1)
9312 .m(1)
9313 .n(16)
9314 .k(k)
9315 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08009316 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -07009317 }
9318 }
9319
9320 TEST(QS8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_gt_8_subtile) {
9321 TEST_REQUIRES_ARM_NEON;
9322 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009323 for (uint32_t n = 1; n <= 16; n++) {
9324 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -07009325 GemmMicrokernelTester()
9326 .mr(1)
9327 .nr(16)
9328 .kr(1)
9329 .sr(1)
9330 .m(m)
9331 .n(n)
9332 .k(k)
9333 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009334 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -07009335 }
9336 }
9337 }
9338 }
9339
9340 TEST(QS8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_div_8) {
9341 TEST_REQUIRES_ARM_NEON;
9342 for (size_t k = 16; k <= 80; k += 8) {
9343 GemmMicrokernelTester()
9344 .mr(1)
9345 .nr(16)
9346 .kr(1)
9347 .sr(1)
9348 .m(1)
9349 .n(16)
9350 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009351 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -07009352 }
9353 }
9354
9355 TEST(QS8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_div_8_strided_a) {
9356 TEST_REQUIRES_ARM_NEON;
9357 for (size_t k = 16; k <= 80; k += 8) {
9358 GemmMicrokernelTester()
9359 .mr(1)
9360 .nr(16)
9361 .kr(1)
9362 .sr(1)
9363 .m(1)
9364 .n(16)
9365 .k(k)
9366 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08009367 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -07009368 }
9369 }
9370
9371 TEST(QS8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_div_8_subtile) {
9372 TEST_REQUIRES_ARM_NEON;
9373 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009374 for (uint32_t n = 1; n <= 16; n++) {
9375 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -07009376 GemmMicrokernelTester()
9377 .mr(1)
9378 .nr(16)
9379 .kr(1)
9380 .sr(1)
9381 .m(m)
9382 .n(n)
9383 .k(k)
9384 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009385 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -07009386 }
9387 }
9388 }
9389 }
9390
9391 TEST(QS8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, n_gt_16) {
9392 TEST_REQUIRES_ARM_NEON;
9393 for (uint32_t n = 17; n < 32; n++) {
9394 for (size_t k = 1; k <= 40; k += 9) {
9395 GemmMicrokernelTester()
9396 .mr(1)
9397 .nr(16)
9398 .kr(1)
9399 .sr(1)
9400 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009401 .n(n)
Marat Dukhan2d3c97c2021-06-25 18:00:28 -07009402 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009403 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -07009404 }
9405 }
9406 }
9407
9408 TEST(QS8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, n_gt_16_strided_cn) {
9409 TEST_REQUIRES_ARM_NEON;
9410 for (uint32_t n = 17; n < 32; n++) {
9411 for (size_t k = 1; k <= 40; k += 9) {
9412 GemmMicrokernelTester()
9413 .mr(1)
9414 .nr(16)
9415 .kr(1)
9416 .sr(1)
9417 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009418 .n(n)
Marat Dukhan2d3c97c2021-06-25 18:00:28 -07009419 .k(k)
9420 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08009421 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -07009422 }
9423 }
9424 }
9425
9426 TEST(QS8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, n_gt_16_strided_a) {
9427 TEST_REQUIRES_ARM_NEON;
9428 for (uint32_t n = 17; n < 32; n++) {
9429 for (size_t k = 1; k <= 40; k += 9) {
9430 GemmMicrokernelTester()
9431 .mr(1)
9432 .nr(16)
9433 .kr(1)
9434 .sr(1)
9435 .m(1)
9436 .n(n)
9437 .k(k)
9438 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08009439 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -07009440 }
9441 }
9442 }
9443
9444 TEST(QS8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, n_gt_16_subtile) {
9445 TEST_REQUIRES_ARM_NEON;
9446 for (uint32_t n = 17; n < 32; n++) {
9447 for (size_t k = 1; k <= 40; k += 9) {
9448 for (uint32_t m = 1; m <= 1; m++) {
9449 GemmMicrokernelTester()
9450 .mr(1)
9451 .nr(16)
9452 .kr(1)
9453 .sr(1)
9454 .m(m)
9455 .n(n)
9456 .k(k)
9457 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009458 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -07009459 }
9460 }
9461 }
9462 }
9463
9464 TEST(QS8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, n_div_16) {
9465 TEST_REQUIRES_ARM_NEON;
9466 for (uint32_t n = 32; n <= 48; n += 16) {
9467 for (size_t k = 1; k <= 40; k += 9) {
9468 GemmMicrokernelTester()
9469 .mr(1)
9470 .nr(16)
9471 .kr(1)
9472 .sr(1)
9473 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009474 .n(n)
Marat Dukhan2d3c97c2021-06-25 18:00:28 -07009475 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009476 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -07009477 }
9478 }
9479 }
9480
9481 TEST(QS8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, n_div_16_strided_cn) {
9482 TEST_REQUIRES_ARM_NEON;
9483 for (uint32_t n = 32; n <= 48; n += 16) {
9484 for (size_t k = 1; k <= 40; k += 9) {
9485 GemmMicrokernelTester()
9486 .mr(1)
9487 .nr(16)
9488 .kr(1)
9489 .sr(1)
9490 .m(1)
9491 .n(n)
9492 .k(k)
9493 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08009494 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -07009495 }
9496 }
9497 }
9498
9499 TEST(QS8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, n_div_16_strided_a) {
9500 TEST_REQUIRES_ARM_NEON;
9501 for (uint32_t n = 32; n <= 48; n += 16) {
9502 for (size_t k = 1; k <= 40; k += 9) {
9503 GemmMicrokernelTester()
9504 .mr(1)
9505 .nr(16)
9506 .kr(1)
9507 .sr(1)
9508 .m(1)
9509 .n(n)
9510 .k(k)
9511 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08009512 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -07009513 }
9514 }
9515 }
9516
9517 TEST(QS8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, n_div_16_subtile) {
9518 TEST_REQUIRES_ARM_NEON;
9519 for (uint32_t n = 32; n <= 48; n += 16) {
9520 for (size_t k = 1; k <= 40; k += 9) {
9521 for (uint32_t m = 1; m <= 1; m++) {
9522 GemmMicrokernelTester()
9523 .mr(1)
9524 .nr(16)
9525 .kr(1)
9526 .sr(1)
9527 .m(m)
9528 .n(n)
9529 .k(k)
9530 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009531 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -07009532 }
9533 }
9534 }
9535 }
9536
9537 TEST(QS8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, strided_cm_subtile) {
9538 TEST_REQUIRES_ARM_NEON;
9539 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009540 for (uint32_t n = 1; n <= 16; n++) {
9541 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -07009542 GemmMicrokernelTester()
9543 .mr(1)
9544 .nr(16)
9545 .kr(1)
9546 .sr(1)
9547 .m(m)
9548 .n(n)
9549 .k(k)
9550 .cm_stride(19)
9551 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009552 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -07009553 }
9554 }
9555 }
9556 }
9557
9558 TEST(QS8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, qmin) {
9559 TEST_REQUIRES_ARM_NEON;
9560 GemmMicrokernelTester()
9561 .mr(1)
9562 .nr(16)
9563 .kr(1)
9564 .sr(1)
9565 .m(1)
9566 .n(16)
9567 .k(8)
9568 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009569 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -07009570 }
9571
9572 TEST(QS8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, qmax) {
9573 TEST_REQUIRES_ARM_NEON;
9574 GemmMicrokernelTester()
9575 .mr(1)
9576 .nr(16)
9577 .kr(1)
9578 .sr(1)
9579 .m(1)
9580 .n(16)
9581 .k(8)
9582 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009583 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -07009584 }
9585
9586 TEST(QS8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, strided_cm) {
9587 TEST_REQUIRES_ARM_NEON;
9588 GemmMicrokernelTester()
9589 .mr(1)
9590 .nr(16)
9591 .kr(1)
9592 .sr(1)
9593 .m(1)
9594 .n(16)
9595 .k(8)
9596 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08009597 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -07009598 }
9599#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
9600
9601
9602#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhan2d3c97c2021-06-25 18:00:28 -07009603 TEST(QS8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_eq_8) {
9604 TEST_REQUIRES_ARM_NEON_V8;
9605 GemmMicrokernelTester()
9606 .mr(1)
9607 .nr(16)
9608 .kr(1)
9609 .sr(1)
9610 .m(1)
9611 .n(16)
9612 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08009613 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -07009614 }
9615
9616 TEST(QS8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, strided_cn) {
9617 TEST_REQUIRES_ARM_NEON_V8;
9618 GemmMicrokernelTester()
9619 .mr(1)
9620 .nr(16)
9621 .kr(1)
9622 .sr(1)
9623 .m(1)
9624 .n(16)
9625 .k(8)
9626 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08009627 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -07009628 }
9629
9630 TEST(QS8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_eq_8_strided_a) {
9631 TEST_REQUIRES_ARM_NEON_V8;
9632 GemmMicrokernelTester()
9633 .mr(1)
9634 .nr(16)
9635 .kr(1)
9636 .sr(1)
9637 .m(1)
9638 .n(16)
9639 .k(8)
9640 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08009641 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -07009642 }
9643
9644 TEST(QS8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_eq_8_subtile) {
9645 TEST_REQUIRES_ARM_NEON_V8;
Zhi An Ng83844ae2022-01-14 09:52:25 -08009646 for (uint32_t n = 1; n <= 16; n++) {
9647 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -07009648 GemmMicrokernelTester()
9649 .mr(1)
9650 .nr(16)
9651 .kr(1)
9652 .sr(1)
9653 .m(m)
9654 .n(n)
9655 .k(8)
9656 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009657 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -07009658 }
9659 }
9660 }
9661
9662 TEST(QS8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_eq_8_subtile_m) {
9663 TEST_REQUIRES_ARM_NEON_V8;
9664 for (uint32_t m = 1; m <= 1; m++) {
9665 GemmMicrokernelTester()
9666 .mr(1)
9667 .nr(16)
9668 .kr(1)
9669 .sr(1)
9670 .m(m)
9671 .n(16)
9672 .k(8)
9673 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009674 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -07009675 }
9676 }
9677
9678 TEST(QS8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_eq_8_subtile_n) {
9679 TEST_REQUIRES_ARM_NEON_V8;
9680 for (uint32_t n = 1; n <= 16; n++) {
9681 GemmMicrokernelTester()
9682 .mr(1)
9683 .nr(16)
9684 .kr(1)
9685 .sr(1)
9686 .m(1)
9687 .n(n)
9688 .k(8)
9689 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009690 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -07009691 }
9692 }
9693
9694 TEST(QS8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_lt_8) {
9695 TEST_REQUIRES_ARM_NEON_V8;
9696 for (size_t k = 1; k < 8; k++) {
9697 GemmMicrokernelTester()
9698 .mr(1)
9699 .nr(16)
9700 .kr(1)
9701 .sr(1)
9702 .m(1)
9703 .n(16)
9704 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009705 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -07009706 }
9707 }
9708
9709 TEST(QS8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_lt_8_strided_a) {
9710 TEST_REQUIRES_ARM_NEON_V8;
9711 for (size_t k = 1; k < 8; k++) {
9712 GemmMicrokernelTester()
9713 .mr(1)
9714 .nr(16)
9715 .kr(1)
9716 .sr(1)
9717 .m(1)
9718 .n(16)
9719 .k(k)
9720 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08009721 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -07009722 }
9723 }
9724
9725 TEST(QS8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_lt_8_subtile) {
9726 TEST_REQUIRES_ARM_NEON_V8;
9727 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009728 for (uint32_t n = 1; n <= 16; n++) {
9729 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -07009730 GemmMicrokernelTester()
9731 .mr(1)
9732 .nr(16)
9733 .kr(1)
9734 .sr(1)
9735 .m(m)
9736 .n(n)
9737 .k(k)
9738 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009739 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -07009740 }
9741 }
9742 }
9743 }
9744
9745 TEST(QS8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_gt_8) {
9746 TEST_REQUIRES_ARM_NEON_V8;
9747 for (size_t k = 9; k < 16; k++) {
9748 GemmMicrokernelTester()
9749 .mr(1)
9750 .nr(16)
9751 .kr(1)
9752 .sr(1)
9753 .m(1)
9754 .n(16)
9755 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009756 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -07009757 }
9758 }
9759
9760 TEST(QS8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_gt_8_strided_a) {
9761 TEST_REQUIRES_ARM_NEON_V8;
9762 for (size_t k = 9; k < 16; k++) {
9763 GemmMicrokernelTester()
9764 .mr(1)
9765 .nr(16)
9766 .kr(1)
9767 .sr(1)
9768 .m(1)
9769 .n(16)
9770 .k(k)
9771 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08009772 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -07009773 }
9774 }
9775
9776 TEST(QS8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_gt_8_subtile) {
9777 TEST_REQUIRES_ARM_NEON_V8;
9778 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009779 for (uint32_t n = 1; n <= 16; n++) {
9780 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -07009781 GemmMicrokernelTester()
9782 .mr(1)
9783 .nr(16)
9784 .kr(1)
9785 .sr(1)
9786 .m(m)
9787 .n(n)
9788 .k(k)
9789 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009790 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -07009791 }
9792 }
9793 }
9794 }
9795
9796 TEST(QS8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_div_8) {
9797 TEST_REQUIRES_ARM_NEON_V8;
9798 for (size_t k = 16; k <= 80; k += 8) {
9799 GemmMicrokernelTester()
9800 .mr(1)
9801 .nr(16)
9802 .kr(1)
9803 .sr(1)
9804 .m(1)
9805 .n(16)
9806 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009807 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -07009808 }
9809 }
9810
9811 TEST(QS8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_div_8_strided_a) {
9812 TEST_REQUIRES_ARM_NEON_V8;
9813 for (size_t k = 16; k <= 80; k += 8) {
9814 GemmMicrokernelTester()
9815 .mr(1)
9816 .nr(16)
9817 .kr(1)
9818 .sr(1)
9819 .m(1)
9820 .n(16)
9821 .k(k)
9822 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08009823 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -07009824 }
9825 }
9826
9827 TEST(QS8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_div_8_subtile) {
9828 TEST_REQUIRES_ARM_NEON_V8;
9829 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009830 for (uint32_t n = 1; n <= 16; n++) {
9831 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -07009832 GemmMicrokernelTester()
9833 .mr(1)
9834 .nr(16)
9835 .kr(1)
9836 .sr(1)
9837 .m(m)
9838 .n(n)
9839 .k(k)
9840 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009841 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -07009842 }
9843 }
9844 }
9845 }
9846
9847 TEST(QS8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, n_gt_16) {
9848 TEST_REQUIRES_ARM_NEON_V8;
9849 for (uint32_t n = 17; n < 32; n++) {
9850 for (size_t k = 1; k <= 40; k += 9) {
9851 GemmMicrokernelTester()
9852 .mr(1)
9853 .nr(16)
9854 .kr(1)
9855 .sr(1)
9856 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009857 .n(n)
Marat Dukhan2d3c97c2021-06-25 18:00:28 -07009858 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009859 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -07009860 }
9861 }
9862 }
9863
9864 TEST(QS8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, n_gt_16_strided_cn) {
9865 TEST_REQUIRES_ARM_NEON_V8;
9866 for (uint32_t n = 17; n < 32; n++) {
9867 for (size_t k = 1; k <= 40; k += 9) {
9868 GemmMicrokernelTester()
9869 .mr(1)
9870 .nr(16)
9871 .kr(1)
9872 .sr(1)
9873 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009874 .n(n)
Marat Dukhan2d3c97c2021-06-25 18:00:28 -07009875 .k(k)
9876 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08009877 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -07009878 }
9879 }
9880 }
9881
9882 TEST(QS8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, n_gt_16_strided_a) {
9883 TEST_REQUIRES_ARM_NEON_V8;
9884 for (uint32_t n = 17; n < 32; n++) {
9885 for (size_t k = 1; k <= 40; k += 9) {
9886 GemmMicrokernelTester()
9887 .mr(1)
9888 .nr(16)
9889 .kr(1)
9890 .sr(1)
9891 .m(1)
9892 .n(n)
9893 .k(k)
9894 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08009895 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -07009896 }
9897 }
9898 }
9899
9900 TEST(QS8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, n_gt_16_subtile) {
9901 TEST_REQUIRES_ARM_NEON_V8;
9902 for (uint32_t n = 17; n < 32; n++) {
9903 for (size_t k = 1; k <= 40; k += 9) {
9904 for (uint32_t m = 1; m <= 1; m++) {
9905 GemmMicrokernelTester()
9906 .mr(1)
9907 .nr(16)
9908 .kr(1)
9909 .sr(1)
9910 .m(m)
9911 .n(n)
9912 .k(k)
9913 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009914 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -07009915 }
9916 }
9917 }
9918 }
9919
9920 TEST(QS8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, n_div_16) {
9921 TEST_REQUIRES_ARM_NEON_V8;
9922 for (uint32_t n = 32; n <= 48; n += 16) {
9923 for (size_t k = 1; k <= 40; k += 9) {
9924 GemmMicrokernelTester()
9925 .mr(1)
9926 .nr(16)
9927 .kr(1)
9928 .sr(1)
9929 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009930 .n(n)
Marat Dukhan2d3c97c2021-06-25 18:00:28 -07009931 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009932 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -07009933 }
9934 }
9935 }
9936
9937 TEST(QS8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, n_div_16_strided_cn) {
9938 TEST_REQUIRES_ARM_NEON_V8;
9939 for (uint32_t n = 32; n <= 48; n += 16) {
9940 for (size_t k = 1; k <= 40; k += 9) {
9941 GemmMicrokernelTester()
9942 .mr(1)
9943 .nr(16)
9944 .kr(1)
9945 .sr(1)
9946 .m(1)
9947 .n(n)
9948 .k(k)
9949 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08009950 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -07009951 }
9952 }
9953 }
9954
9955 TEST(QS8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, n_div_16_strided_a) {
9956 TEST_REQUIRES_ARM_NEON_V8;
9957 for (uint32_t n = 32; n <= 48; n += 16) {
9958 for (size_t k = 1; k <= 40; k += 9) {
9959 GemmMicrokernelTester()
9960 .mr(1)
9961 .nr(16)
9962 .kr(1)
9963 .sr(1)
9964 .m(1)
9965 .n(n)
9966 .k(k)
9967 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08009968 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -07009969 }
9970 }
9971 }
9972
9973 TEST(QS8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, n_div_16_subtile) {
9974 TEST_REQUIRES_ARM_NEON_V8;
9975 for (uint32_t n = 32; n <= 48; n += 16) {
9976 for (size_t k = 1; k <= 40; k += 9) {
9977 for (uint32_t m = 1; m <= 1; m++) {
9978 GemmMicrokernelTester()
9979 .mr(1)
9980 .nr(16)
9981 .kr(1)
9982 .sr(1)
9983 .m(m)
9984 .n(n)
9985 .k(k)
9986 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009987 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -07009988 }
9989 }
9990 }
9991 }
9992
9993 TEST(QS8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, strided_cm_subtile) {
9994 TEST_REQUIRES_ARM_NEON_V8;
9995 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009996 for (uint32_t n = 1; n <= 16; n++) {
9997 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -07009998 GemmMicrokernelTester()
9999 .mr(1)
10000 .nr(16)
10001 .kr(1)
10002 .sr(1)
10003 .m(m)
10004 .n(n)
10005 .k(k)
10006 .cm_stride(19)
10007 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010008 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010009 }
10010 }
10011 }
10012 }
10013
10014 TEST(QS8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, qmin) {
10015 TEST_REQUIRES_ARM_NEON_V8;
10016 GemmMicrokernelTester()
10017 .mr(1)
10018 .nr(16)
10019 .kr(1)
10020 .sr(1)
10021 .m(1)
10022 .n(16)
10023 .k(8)
10024 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010025 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010026 }
10027
10028 TEST(QS8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, qmax) {
10029 TEST_REQUIRES_ARM_NEON_V8;
10030 GemmMicrokernelTester()
10031 .mr(1)
10032 .nr(16)
10033 .kr(1)
10034 .sr(1)
10035 .m(1)
10036 .n(16)
10037 .k(8)
10038 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010039 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010040 }
10041
10042 TEST(QS8_GEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, strided_cm) {
10043 TEST_REQUIRES_ARM_NEON_V8;
10044 GemmMicrokernelTester()
10045 .mr(1)
10046 .nr(16)
10047 .kr(1)
10048 .sr(1)
10049 .m(1)
10050 .n(16)
10051 .k(8)
10052 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080010053 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010054 }
10055#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
10056
10057
10058#if XNN_ARCH_ARM || XNN_ARCH_ARM64
10059 TEST(QS8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_eq_8) {
10060 TEST_REQUIRES_ARM_NEON_V8;
10061 GemmMicrokernelTester()
10062 .mr(4)
10063 .nr(16)
10064 .kr(1)
10065 .sr(1)
10066 .m(4)
10067 .n(16)
10068 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080010069 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010070 }
10071
10072 TEST(QS8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, strided_cn) {
10073 TEST_REQUIRES_ARM_NEON_V8;
10074 GemmMicrokernelTester()
10075 .mr(4)
10076 .nr(16)
10077 .kr(1)
10078 .sr(1)
10079 .m(4)
10080 .n(16)
10081 .k(8)
10082 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080010083 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010084 }
10085
10086 TEST(QS8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_eq_8_strided_a) {
10087 TEST_REQUIRES_ARM_NEON_V8;
10088 GemmMicrokernelTester()
10089 .mr(4)
10090 .nr(16)
10091 .kr(1)
10092 .sr(1)
10093 .m(4)
10094 .n(16)
10095 .k(8)
10096 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080010097 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010098 }
10099
10100 TEST(QS8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_eq_8_subtile) {
10101 TEST_REQUIRES_ARM_NEON_V8;
Zhi An Ng83844ae2022-01-14 09:52:25 -080010102 for (uint32_t n = 1; n <= 16; n++) {
10103 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010104 GemmMicrokernelTester()
10105 .mr(4)
10106 .nr(16)
10107 .kr(1)
10108 .sr(1)
10109 .m(m)
10110 .n(n)
10111 .k(8)
10112 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010113 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010114 }
10115 }
10116 }
10117
10118 TEST(QS8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_eq_8_subtile_m) {
10119 TEST_REQUIRES_ARM_NEON_V8;
10120 for (uint32_t m = 1; m <= 4; m++) {
10121 GemmMicrokernelTester()
10122 .mr(4)
10123 .nr(16)
10124 .kr(1)
10125 .sr(1)
10126 .m(m)
10127 .n(16)
10128 .k(8)
10129 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010130 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010131 }
10132 }
10133
10134 TEST(QS8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_eq_8_subtile_n) {
10135 TEST_REQUIRES_ARM_NEON_V8;
10136 for (uint32_t n = 1; n <= 16; n++) {
10137 GemmMicrokernelTester()
10138 .mr(4)
10139 .nr(16)
10140 .kr(1)
10141 .sr(1)
10142 .m(4)
10143 .n(n)
10144 .k(8)
10145 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010146 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010147 }
10148 }
10149
10150 TEST(QS8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_lt_8) {
10151 TEST_REQUIRES_ARM_NEON_V8;
10152 for (size_t k = 1; k < 8; k++) {
10153 GemmMicrokernelTester()
10154 .mr(4)
10155 .nr(16)
10156 .kr(1)
10157 .sr(1)
10158 .m(4)
10159 .n(16)
10160 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080010161 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010162 }
10163 }
10164
10165 TEST(QS8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_lt_8_strided_a) {
10166 TEST_REQUIRES_ARM_NEON_V8;
10167 for (size_t k = 1; k < 8; k++) {
10168 GemmMicrokernelTester()
10169 .mr(4)
10170 .nr(16)
10171 .kr(1)
10172 .sr(1)
10173 .m(4)
10174 .n(16)
10175 .k(k)
10176 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080010177 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010178 }
10179 }
10180
10181 TEST(QS8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_lt_8_subtile) {
10182 TEST_REQUIRES_ARM_NEON_V8;
10183 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010184 for (uint32_t n = 1; n <= 16; n++) {
10185 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010186 GemmMicrokernelTester()
10187 .mr(4)
10188 .nr(16)
10189 .kr(1)
10190 .sr(1)
10191 .m(m)
10192 .n(n)
10193 .k(k)
10194 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010195 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010196 }
10197 }
10198 }
10199 }
10200
10201 TEST(QS8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_gt_8) {
10202 TEST_REQUIRES_ARM_NEON_V8;
10203 for (size_t k = 9; k < 16; k++) {
10204 GemmMicrokernelTester()
10205 .mr(4)
10206 .nr(16)
10207 .kr(1)
10208 .sr(1)
10209 .m(4)
10210 .n(16)
10211 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080010212 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010213 }
10214 }
10215
10216 TEST(QS8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_gt_8_strided_a) {
10217 TEST_REQUIRES_ARM_NEON_V8;
10218 for (size_t k = 9; k < 16; k++) {
10219 GemmMicrokernelTester()
10220 .mr(4)
10221 .nr(16)
10222 .kr(1)
10223 .sr(1)
10224 .m(4)
10225 .n(16)
10226 .k(k)
10227 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080010228 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010229 }
10230 }
10231
10232 TEST(QS8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_gt_8_subtile) {
10233 TEST_REQUIRES_ARM_NEON_V8;
10234 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010235 for (uint32_t n = 1; n <= 16; n++) {
10236 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010237 GemmMicrokernelTester()
10238 .mr(4)
10239 .nr(16)
10240 .kr(1)
10241 .sr(1)
10242 .m(m)
10243 .n(n)
10244 .k(k)
10245 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010246 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010247 }
10248 }
10249 }
10250 }
10251
10252 TEST(QS8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_div_8) {
10253 TEST_REQUIRES_ARM_NEON_V8;
10254 for (size_t k = 16; k <= 80; k += 8) {
10255 GemmMicrokernelTester()
10256 .mr(4)
10257 .nr(16)
10258 .kr(1)
10259 .sr(1)
10260 .m(4)
10261 .n(16)
10262 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080010263 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010264 }
10265 }
10266
10267 TEST(QS8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_div_8_strided_a) {
10268 TEST_REQUIRES_ARM_NEON_V8;
10269 for (size_t k = 16; k <= 80; k += 8) {
10270 GemmMicrokernelTester()
10271 .mr(4)
10272 .nr(16)
10273 .kr(1)
10274 .sr(1)
10275 .m(4)
10276 .n(16)
10277 .k(k)
10278 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080010279 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010280 }
10281 }
10282
10283 TEST(QS8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_div_8_subtile) {
10284 TEST_REQUIRES_ARM_NEON_V8;
10285 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010286 for (uint32_t n = 1; n <= 16; n++) {
10287 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010288 GemmMicrokernelTester()
10289 .mr(4)
10290 .nr(16)
10291 .kr(1)
10292 .sr(1)
10293 .m(m)
10294 .n(n)
10295 .k(k)
10296 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010297 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010298 }
10299 }
10300 }
10301 }
10302
10303 TEST(QS8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, n_gt_16) {
10304 TEST_REQUIRES_ARM_NEON_V8;
10305 for (uint32_t n = 17; n < 32; n++) {
10306 for (size_t k = 1; k <= 40; k += 9) {
10307 GemmMicrokernelTester()
10308 .mr(4)
10309 .nr(16)
10310 .kr(1)
10311 .sr(1)
10312 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010313 .n(n)
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010314 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080010315 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010316 }
10317 }
10318 }
10319
10320 TEST(QS8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, n_gt_16_strided_cn) {
10321 TEST_REQUIRES_ARM_NEON_V8;
10322 for (uint32_t n = 17; n < 32; n++) {
10323 for (size_t k = 1; k <= 40; k += 9) {
10324 GemmMicrokernelTester()
10325 .mr(4)
10326 .nr(16)
10327 .kr(1)
10328 .sr(1)
10329 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010330 .n(n)
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010331 .k(k)
10332 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080010333 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010334 }
10335 }
10336 }
10337
10338 TEST(QS8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, n_gt_16_strided_a) {
10339 TEST_REQUIRES_ARM_NEON_V8;
10340 for (uint32_t n = 17; n < 32; n++) {
10341 for (size_t k = 1; k <= 40; k += 9) {
10342 GemmMicrokernelTester()
10343 .mr(4)
10344 .nr(16)
10345 .kr(1)
10346 .sr(1)
10347 .m(4)
10348 .n(n)
10349 .k(k)
10350 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080010351 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010352 }
10353 }
10354 }
10355
10356 TEST(QS8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, n_gt_16_subtile) {
10357 TEST_REQUIRES_ARM_NEON_V8;
10358 for (uint32_t n = 17; n < 32; n++) {
10359 for (size_t k = 1; k <= 40; k += 9) {
10360 for (uint32_t m = 1; m <= 4; m++) {
10361 GemmMicrokernelTester()
10362 .mr(4)
10363 .nr(16)
10364 .kr(1)
10365 .sr(1)
10366 .m(m)
10367 .n(n)
10368 .k(k)
10369 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010370 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010371 }
10372 }
10373 }
10374 }
10375
10376 TEST(QS8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, n_div_16) {
10377 TEST_REQUIRES_ARM_NEON_V8;
10378 for (uint32_t n = 32; n <= 48; n += 16) {
10379 for (size_t k = 1; k <= 40; k += 9) {
10380 GemmMicrokernelTester()
10381 .mr(4)
10382 .nr(16)
10383 .kr(1)
10384 .sr(1)
10385 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010386 .n(n)
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010387 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080010388 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010389 }
10390 }
10391 }
10392
10393 TEST(QS8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, n_div_16_strided_cn) {
10394 TEST_REQUIRES_ARM_NEON_V8;
10395 for (uint32_t n = 32; n <= 48; n += 16) {
10396 for (size_t k = 1; k <= 40; k += 9) {
10397 GemmMicrokernelTester()
10398 .mr(4)
10399 .nr(16)
10400 .kr(1)
10401 .sr(1)
10402 .m(4)
10403 .n(n)
10404 .k(k)
10405 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080010406 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010407 }
10408 }
10409 }
10410
10411 TEST(QS8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, n_div_16_strided_a) {
10412 TEST_REQUIRES_ARM_NEON_V8;
10413 for (uint32_t n = 32; n <= 48; n += 16) {
10414 for (size_t k = 1; k <= 40; k += 9) {
10415 GemmMicrokernelTester()
10416 .mr(4)
10417 .nr(16)
10418 .kr(1)
10419 .sr(1)
10420 .m(4)
10421 .n(n)
10422 .k(k)
10423 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080010424 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010425 }
10426 }
10427 }
10428
10429 TEST(QS8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, n_div_16_subtile) {
10430 TEST_REQUIRES_ARM_NEON_V8;
10431 for (uint32_t n = 32; n <= 48; n += 16) {
10432 for (size_t k = 1; k <= 40; k += 9) {
10433 for (uint32_t m = 1; m <= 4; m++) {
10434 GemmMicrokernelTester()
10435 .mr(4)
10436 .nr(16)
10437 .kr(1)
10438 .sr(1)
10439 .m(m)
10440 .n(n)
10441 .k(k)
10442 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010443 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010444 }
10445 }
10446 }
10447 }
10448
10449 TEST(QS8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, strided_cm_subtile) {
10450 TEST_REQUIRES_ARM_NEON_V8;
10451 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010452 for (uint32_t n = 1; n <= 16; n++) {
10453 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010454 GemmMicrokernelTester()
10455 .mr(4)
10456 .nr(16)
10457 .kr(1)
10458 .sr(1)
10459 .m(m)
10460 .n(n)
10461 .k(k)
10462 .cm_stride(19)
10463 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010464 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010465 }
10466 }
10467 }
10468 }
10469
10470 TEST(QS8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, qmin) {
10471 TEST_REQUIRES_ARM_NEON_V8;
10472 GemmMicrokernelTester()
10473 .mr(4)
10474 .nr(16)
10475 .kr(1)
10476 .sr(1)
10477 .m(4)
10478 .n(16)
10479 .k(8)
10480 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010481 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010482 }
10483
10484 TEST(QS8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, qmax) {
10485 TEST_REQUIRES_ARM_NEON_V8;
10486 GemmMicrokernelTester()
10487 .mr(4)
10488 .nr(16)
10489 .kr(1)
10490 .sr(1)
10491 .m(4)
10492 .n(16)
10493 .k(8)
10494 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010495 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010496 }
10497
10498 TEST(QS8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, strided_cm) {
10499 TEST_REQUIRES_ARM_NEON_V8;
10500 GemmMicrokernelTester()
10501 .mr(4)
10502 .nr(16)
10503 .kr(1)
10504 .sr(1)
10505 .m(4)
10506 .n(16)
10507 .k(8)
10508 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080010509 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010510 }
10511#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
10512
10513
10514#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Frank Barcharde22685a2021-11-12 11:36:58 -080010515 TEST(QS8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL, k_eq_16) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010516 TEST_REQUIRES_ARM_NEON;
10517 GemmMicrokernelTester()
10518 .mr(1)
10519 .nr(8)
10520 .kr(8)
10521 .sr(1)
10522 .m(1)
10523 .n(8)
10524 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080010525 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010526 }
10527
Frank Barcharde22685a2021-11-12 11:36:58 -080010528 TEST(QS8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL, strided_cn) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010529 TEST_REQUIRES_ARM_NEON;
10530 GemmMicrokernelTester()
10531 .mr(1)
10532 .nr(8)
10533 .kr(8)
10534 .sr(1)
10535 .m(1)
10536 .n(8)
10537 .k(16)
10538 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080010539 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010540 }
10541
Frank Barcharde22685a2021-11-12 11:36:58 -080010542 TEST(QS8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL, k_eq_16_strided_a) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010543 TEST_REQUIRES_ARM_NEON;
10544 GemmMicrokernelTester()
10545 .mr(1)
10546 .nr(8)
10547 .kr(8)
10548 .sr(1)
10549 .m(1)
10550 .n(8)
10551 .k(16)
10552 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080010553 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010554 }
10555
Frank Barcharde22685a2021-11-12 11:36:58 -080010556 TEST(QS8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL, k_eq_16_subtile) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010557 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080010558 for (uint32_t n = 1; n <= 8; n++) {
10559 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010560 GemmMicrokernelTester()
10561 .mr(1)
10562 .nr(8)
10563 .kr(8)
10564 .sr(1)
10565 .m(m)
10566 .n(n)
10567 .k(16)
10568 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010569 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010570 }
10571 }
10572 }
10573
Frank Barcharde22685a2021-11-12 11:36:58 -080010574 TEST(QS8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL, k_eq_16_subtile_m) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010575 TEST_REQUIRES_ARM_NEON;
10576 for (uint32_t m = 1; m <= 1; m++) {
10577 GemmMicrokernelTester()
10578 .mr(1)
10579 .nr(8)
10580 .kr(8)
10581 .sr(1)
10582 .m(m)
10583 .n(8)
10584 .k(16)
10585 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010586 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010587 }
10588 }
10589
Frank Barcharde22685a2021-11-12 11:36:58 -080010590 TEST(QS8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL, k_eq_16_subtile_n) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010591 TEST_REQUIRES_ARM_NEON;
10592 for (uint32_t n = 1; n <= 8; n++) {
10593 GemmMicrokernelTester()
10594 .mr(1)
10595 .nr(8)
10596 .kr(8)
10597 .sr(1)
10598 .m(1)
10599 .n(n)
10600 .k(16)
10601 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010602 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010603 }
10604 }
10605
Frank Barcharde22685a2021-11-12 11:36:58 -080010606 TEST(QS8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL, k_lt_16) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010607 TEST_REQUIRES_ARM_NEON;
10608 for (size_t k = 1; k < 16; k++) {
10609 GemmMicrokernelTester()
10610 .mr(1)
10611 .nr(8)
10612 .kr(8)
10613 .sr(1)
10614 .m(1)
10615 .n(8)
10616 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080010617 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010618 }
10619 }
10620
Frank Barcharde22685a2021-11-12 11:36:58 -080010621 TEST(QS8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL, k_lt_16_strided_a) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010622 TEST_REQUIRES_ARM_NEON;
10623 for (size_t k = 1; k < 16; k++) {
10624 GemmMicrokernelTester()
10625 .mr(1)
10626 .nr(8)
10627 .kr(8)
10628 .sr(1)
10629 .m(1)
10630 .n(8)
10631 .k(k)
10632 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080010633 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010634 }
10635 }
10636
Frank Barcharde22685a2021-11-12 11:36:58 -080010637 TEST(QS8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL, k_lt_16_subtile) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010638 TEST_REQUIRES_ARM_NEON;
10639 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010640 for (uint32_t n = 1; n <= 8; n++) {
10641 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010642 GemmMicrokernelTester()
10643 .mr(1)
10644 .nr(8)
10645 .kr(8)
10646 .sr(1)
10647 .m(m)
10648 .n(n)
10649 .k(k)
10650 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010651 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010652 }
10653 }
10654 }
10655 }
10656
Frank Barcharde22685a2021-11-12 11:36:58 -080010657 TEST(QS8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL, k_gt_16) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010658 TEST_REQUIRES_ARM_NEON;
10659 for (size_t k = 17; k < 32; k++) {
10660 GemmMicrokernelTester()
10661 .mr(1)
10662 .nr(8)
10663 .kr(8)
10664 .sr(1)
10665 .m(1)
10666 .n(8)
10667 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080010668 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010669 }
10670 }
10671
Frank Barcharde22685a2021-11-12 11:36:58 -080010672 TEST(QS8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL, k_gt_16_strided_a) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010673 TEST_REQUIRES_ARM_NEON;
10674 for (size_t k = 17; k < 32; k++) {
10675 GemmMicrokernelTester()
10676 .mr(1)
10677 .nr(8)
10678 .kr(8)
10679 .sr(1)
10680 .m(1)
10681 .n(8)
10682 .k(k)
10683 .a_stride(37)
Marat Dukhan50323b82022-01-11 00:12:01 -080010684 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010685 }
10686 }
10687
Frank Barcharde22685a2021-11-12 11:36:58 -080010688 TEST(QS8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL, k_gt_16_subtile) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010689 TEST_REQUIRES_ARM_NEON;
10690 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010691 for (uint32_t n = 1; n <= 8; n++) {
10692 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010693 GemmMicrokernelTester()
10694 .mr(1)
10695 .nr(8)
10696 .kr(8)
10697 .sr(1)
10698 .m(m)
10699 .n(n)
10700 .k(k)
10701 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010702 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010703 }
10704 }
10705 }
10706 }
10707
Frank Barcharde22685a2021-11-12 11:36:58 -080010708 TEST(QS8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL, k_div_16) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010709 TEST_REQUIRES_ARM_NEON;
10710 for (size_t k = 32; k <= 160; k += 16) {
10711 GemmMicrokernelTester()
10712 .mr(1)
10713 .nr(8)
10714 .kr(8)
10715 .sr(1)
10716 .m(1)
10717 .n(8)
10718 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080010719 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010720 }
10721 }
10722
Frank Barcharde22685a2021-11-12 11:36:58 -080010723 TEST(QS8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL, k_div_16_strided_a) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010724 TEST_REQUIRES_ARM_NEON;
10725 for (size_t k = 32; k <= 160; k += 16) {
10726 GemmMicrokernelTester()
10727 .mr(1)
10728 .nr(8)
10729 .kr(8)
10730 .sr(1)
10731 .m(1)
10732 .n(8)
10733 .k(k)
10734 .a_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -080010735 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010736 }
10737 }
10738
Frank Barcharde22685a2021-11-12 11:36:58 -080010739 TEST(QS8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL, k_div_16_subtile) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010740 TEST_REQUIRES_ARM_NEON;
10741 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010742 for (uint32_t n = 1; n <= 8; n++) {
10743 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010744 GemmMicrokernelTester()
10745 .mr(1)
10746 .nr(8)
10747 .kr(8)
10748 .sr(1)
10749 .m(m)
10750 .n(n)
10751 .k(k)
10752 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010753 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010754 }
10755 }
10756 }
10757 }
10758
Frank Barcharde22685a2021-11-12 11:36:58 -080010759 TEST(QS8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL, n_gt_8) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010760 TEST_REQUIRES_ARM_NEON;
10761 for (uint32_t n = 9; n < 16; n++) {
10762 for (size_t k = 1; k <= 80; k += 17) {
10763 GemmMicrokernelTester()
10764 .mr(1)
10765 .nr(8)
10766 .kr(8)
10767 .sr(1)
10768 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010769 .n(n)
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010770 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080010771 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010772 }
10773 }
10774 }
10775
Frank Barcharde22685a2021-11-12 11:36:58 -080010776 TEST(QS8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL, n_gt_8_strided_cn) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010777 TEST_REQUIRES_ARM_NEON;
10778 for (uint32_t n = 9; n < 16; n++) {
10779 for (size_t k = 1; k <= 80; k += 17) {
10780 GemmMicrokernelTester()
10781 .mr(1)
10782 .nr(8)
10783 .kr(8)
10784 .sr(1)
10785 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010786 .n(n)
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010787 .k(k)
10788 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080010789 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010790 }
10791 }
10792 }
10793
Frank Barcharde22685a2021-11-12 11:36:58 -080010794 TEST(QS8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL, n_gt_8_strided_a) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010795 TEST_REQUIRES_ARM_NEON;
10796 for (uint32_t n = 9; n < 16; n++) {
10797 for (size_t k = 1; k <= 80; k += 17) {
10798 GemmMicrokernelTester()
10799 .mr(1)
10800 .nr(8)
10801 .kr(8)
10802 .sr(1)
10803 .m(1)
10804 .n(n)
10805 .k(k)
10806 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080010807 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010808 }
10809 }
10810 }
10811
Frank Barcharde22685a2021-11-12 11:36:58 -080010812 TEST(QS8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL, n_gt_8_subtile) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010813 TEST_REQUIRES_ARM_NEON;
10814 for (uint32_t n = 9; n < 16; n++) {
10815 for (size_t k = 1; k <= 80; k += 17) {
10816 for (uint32_t m = 1; m <= 1; m++) {
10817 GemmMicrokernelTester()
10818 .mr(1)
10819 .nr(8)
10820 .kr(8)
10821 .sr(1)
10822 .m(m)
10823 .n(n)
10824 .k(k)
10825 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010826 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010827 }
10828 }
10829 }
10830 }
10831
Frank Barcharde22685a2021-11-12 11:36:58 -080010832 TEST(QS8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL, n_div_8) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010833 TEST_REQUIRES_ARM_NEON;
10834 for (uint32_t n = 16; n <= 24; n += 8) {
10835 for (size_t k = 1; k <= 80; k += 17) {
10836 GemmMicrokernelTester()
10837 .mr(1)
10838 .nr(8)
10839 .kr(8)
10840 .sr(1)
10841 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010842 .n(n)
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010843 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080010844 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010845 }
10846 }
10847 }
10848
Frank Barcharde22685a2021-11-12 11:36:58 -080010849 TEST(QS8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL, n_div_8_strided_cn) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010850 TEST_REQUIRES_ARM_NEON;
10851 for (uint32_t n = 16; n <= 24; n += 8) {
10852 for (size_t k = 1; k <= 80; k += 17) {
10853 GemmMicrokernelTester()
10854 .mr(1)
10855 .nr(8)
10856 .kr(8)
10857 .sr(1)
10858 .m(1)
10859 .n(n)
10860 .k(k)
10861 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080010862 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010863 }
10864 }
10865 }
10866
Frank Barcharde22685a2021-11-12 11:36:58 -080010867 TEST(QS8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL, n_div_8_strided_a) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010868 TEST_REQUIRES_ARM_NEON;
10869 for (uint32_t n = 16; n <= 24; n += 8) {
10870 for (size_t k = 1; k <= 80; k += 17) {
10871 GemmMicrokernelTester()
10872 .mr(1)
10873 .nr(8)
10874 .kr(8)
10875 .sr(1)
10876 .m(1)
10877 .n(n)
10878 .k(k)
10879 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080010880 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010881 }
10882 }
10883 }
10884
Frank Barcharde22685a2021-11-12 11:36:58 -080010885 TEST(QS8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL, n_div_8_subtile) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010886 TEST_REQUIRES_ARM_NEON;
10887 for (uint32_t n = 16; n <= 24; n += 8) {
10888 for (size_t k = 1; k <= 80; k += 17) {
10889 for (uint32_t m = 1; m <= 1; m++) {
10890 GemmMicrokernelTester()
10891 .mr(1)
10892 .nr(8)
10893 .kr(8)
10894 .sr(1)
10895 .m(m)
10896 .n(n)
10897 .k(k)
10898 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010899 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010900 }
10901 }
10902 }
10903 }
10904
Frank Barcharde22685a2021-11-12 11:36:58 -080010905 TEST(QS8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL, strided_cm_subtile) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010906 TEST_REQUIRES_ARM_NEON;
10907 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010908 for (uint32_t n = 1; n <= 8; n++) {
10909 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010910 GemmMicrokernelTester()
10911 .mr(1)
10912 .nr(8)
10913 .kr(8)
10914 .sr(1)
10915 .m(m)
10916 .n(n)
10917 .k(k)
10918 .cm_stride(11)
10919 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010920 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010921 }
10922 }
10923 }
10924 }
10925
Frank Barcharde22685a2021-11-12 11:36:58 -080010926 TEST(QS8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL, qmin) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010927 TEST_REQUIRES_ARM_NEON;
10928 GemmMicrokernelTester()
10929 .mr(1)
10930 .nr(8)
10931 .kr(8)
10932 .sr(1)
10933 .m(1)
10934 .n(8)
10935 .k(16)
10936 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010937 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010938 }
10939
Frank Barcharde22685a2021-11-12 11:36:58 -080010940 TEST(QS8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL, qmax) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010941 TEST_REQUIRES_ARM_NEON;
10942 GemmMicrokernelTester()
10943 .mr(1)
10944 .nr(8)
10945 .kr(8)
10946 .sr(1)
10947 .m(1)
10948 .n(8)
10949 .k(16)
10950 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010951 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010952 }
10953
Frank Barcharde22685a2021-11-12 11:36:58 -080010954 TEST(QS8_GEMM_MINMAX_FP32_1X8C8__NEON_MLAL, strided_cm) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010955 TEST_REQUIRES_ARM_NEON;
10956 GemmMicrokernelTester()
10957 .mr(1)
10958 .nr(8)
10959 .kr(8)
10960 .sr(1)
10961 .m(1)
10962 .n(8)
10963 .k(16)
10964 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080010965 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010966 }
10967#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
10968
10969
10970#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Frank Barcharde22685a2021-11-12 11:36:58 -080010971 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL, k_eq_16) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010972 TEST_REQUIRES_ARM_NEON;
10973 GemmMicrokernelTester()
10974 .mr(2)
10975 .nr(8)
10976 .kr(8)
10977 .sr(1)
10978 .m(2)
10979 .n(8)
10980 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080010981 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010982 }
10983
Frank Barcharde22685a2021-11-12 11:36:58 -080010984 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL, strided_cn) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010985 TEST_REQUIRES_ARM_NEON;
10986 GemmMicrokernelTester()
10987 .mr(2)
10988 .nr(8)
10989 .kr(8)
10990 .sr(1)
10991 .m(2)
10992 .n(8)
10993 .k(16)
10994 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080010995 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010996 }
10997
Frank Barcharde22685a2021-11-12 11:36:58 -080010998 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL, k_eq_16_strided_a) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070010999 TEST_REQUIRES_ARM_NEON;
11000 GemmMicrokernelTester()
11001 .mr(2)
11002 .nr(8)
11003 .kr(8)
11004 .sr(1)
11005 .m(2)
11006 .n(8)
11007 .k(16)
11008 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080011009 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070011010 }
11011
Frank Barcharde22685a2021-11-12 11:36:58 -080011012 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL, k_eq_16_subtile) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070011013 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080011014 for (uint32_t n = 1; n <= 8; n++) {
11015 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070011016 GemmMicrokernelTester()
11017 .mr(2)
11018 .nr(8)
11019 .kr(8)
11020 .sr(1)
11021 .m(m)
11022 .n(n)
11023 .k(16)
11024 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011025 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070011026 }
11027 }
11028 }
11029
Frank Barcharde22685a2021-11-12 11:36:58 -080011030 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL, k_eq_16_subtile_m) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070011031 TEST_REQUIRES_ARM_NEON;
11032 for (uint32_t m = 1; m <= 2; m++) {
11033 GemmMicrokernelTester()
11034 .mr(2)
11035 .nr(8)
11036 .kr(8)
11037 .sr(1)
11038 .m(m)
11039 .n(8)
11040 .k(16)
11041 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011042 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070011043 }
11044 }
11045
Frank Barcharde22685a2021-11-12 11:36:58 -080011046 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL, k_eq_16_subtile_n) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070011047 TEST_REQUIRES_ARM_NEON;
11048 for (uint32_t n = 1; n <= 8; n++) {
11049 GemmMicrokernelTester()
11050 .mr(2)
11051 .nr(8)
11052 .kr(8)
11053 .sr(1)
11054 .m(2)
11055 .n(n)
11056 .k(16)
11057 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011058 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070011059 }
11060 }
11061
Frank Barcharde22685a2021-11-12 11:36:58 -080011062 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL, k_lt_16) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070011063 TEST_REQUIRES_ARM_NEON;
11064 for (size_t k = 1; k < 16; k++) {
11065 GemmMicrokernelTester()
11066 .mr(2)
11067 .nr(8)
11068 .kr(8)
11069 .sr(1)
11070 .m(2)
11071 .n(8)
11072 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080011073 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070011074 }
11075 }
11076
Frank Barcharde22685a2021-11-12 11:36:58 -080011077 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL, k_lt_16_strided_a) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070011078 TEST_REQUIRES_ARM_NEON;
11079 for (size_t k = 1; k < 16; k++) {
11080 GemmMicrokernelTester()
11081 .mr(2)
11082 .nr(8)
11083 .kr(8)
11084 .sr(1)
11085 .m(2)
11086 .n(8)
11087 .k(k)
11088 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080011089 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070011090 }
11091 }
11092
Frank Barcharde22685a2021-11-12 11:36:58 -080011093 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL, k_lt_16_subtile) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070011094 TEST_REQUIRES_ARM_NEON;
11095 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011096 for (uint32_t n = 1; n <= 8; n++) {
11097 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070011098 GemmMicrokernelTester()
11099 .mr(2)
11100 .nr(8)
11101 .kr(8)
11102 .sr(1)
11103 .m(m)
11104 .n(n)
11105 .k(k)
11106 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011107 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070011108 }
11109 }
11110 }
11111 }
11112
Frank Barcharde22685a2021-11-12 11:36:58 -080011113 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL, k_gt_16) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070011114 TEST_REQUIRES_ARM_NEON;
11115 for (size_t k = 17; k < 32; k++) {
11116 GemmMicrokernelTester()
11117 .mr(2)
11118 .nr(8)
11119 .kr(8)
11120 .sr(1)
11121 .m(2)
11122 .n(8)
11123 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080011124 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070011125 }
11126 }
11127
Frank Barcharde22685a2021-11-12 11:36:58 -080011128 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL, k_gt_16_strided_a) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070011129 TEST_REQUIRES_ARM_NEON;
11130 for (size_t k = 17; k < 32; k++) {
11131 GemmMicrokernelTester()
11132 .mr(2)
11133 .nr(8)
11134 .kr(8)
11135 .sr(1)
11136 .m(2)
11137 .n(8)
11138 .k(k)
11139 .a_stride(37)
Marat Dukhan50323b82022-01-11 00:12:01 -080011140 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070011141 }
11142 }
11143
Frank Barcharde22685a2021-11-12 11:36:58 -080011144 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL, k_gt_16_subtile) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070011145 TEST_REQUIRES_ARM_NEON;
11146 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011147 for (uint32_t n = 1; n <= 8; n++) {
11148 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070011149 GemmMicrokernelTester()
11150 .mr(2)
11151 .nr(8)
11152 .kr(8)
11153 .sr(1)
11154 .m(m)
11155 .n(n)
11156 .k(k)
11157 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011158 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070011159 }
11160 }
11161 }
11162 }
11163
Frank Barcharde22685a2021-11-12 11:36:58 -080011164 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL, k_div_16) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070011165 TEST_REQUIRES_ARM_NEON;
11166 for (size_t k = 32; k <= 160; k += 16) {
11167 GemmMicrokernelTester()
11168 .mr(2)
11169 .nr(8)
11170 .kr(8)
11171 .sr(1)
11172 .m(2)
11173 .n(8)
11174 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080011175 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070011176 }
11177 }
11178
Frank Barcharde22685a2021-11-12 11:36:58 -080011179 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL, k_div_16_strided_a) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070011180 TEST_REQUIRES_ARM_NEON;
11181 for (size_t k = 32; k <= 160; k += 16) {
11182 GemmMicrokernelTester()
11183 .mr(2)
11184 .nr(8)
11185 .kr(8)
11186 .sr(1)
11187 .m(2)
11188 .n(8)
11189 .k(k)
11190 .a_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -080011191 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070011192 }
11193 }
11194
Frank Barcharde22685a2021-11-12 11:36:58 -080011195 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL, k_div_16_subtile) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070011196 TEST_REQUIRES_ARM_NEON;
11197 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011198 for (uint32_t n = 1; n <= 8; n++) {
11199 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070011200 GemmMicrokernelTester()
11201 .mr(2)
11202 .nr(8)
11203 .kr(8)
11204 .sr(1)
11205 .m(m)
11206 .n(n)
11207 .k(k)
11208 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011209 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070011210 }
11211 }
11212 }
11213 }
11214
Frank Barcharde22685a2021-11-12 11:36:58 -080011215 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL, n_gt_8) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070011216 TEST_REQUIRES_ARM_NEON;
11217 for (uint32_t n = 9; n < 16; n++) {
11218 for (size_t k = 1; k <= 80; k += 17) {
11219 GemmMicrokernelTester()
11220 .mr(2)
11221 .nr(8)
11222 .kr(8)
11223 .sr(1)
11224 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011225 .n(n)
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070011226 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080011227 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070011228 }
11229 }
11230 }
11231
Frank Barcharde22685a2021-11-12 11:36:58 -080011232 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL, n_gt_8_strided_cn) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070011233 TEST_REQUIRES_ARM_NEON;
11234 for (uint32_t n = 9; n < 16; n++) {
11235 for (size_t k = 1; k <= 80; k += 17) {
11236 GemmMicrokernelTester()
11237 .mr(2)
11238 .nr(8)
11239 .kr(8)
11240 .sr(1)
11241 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011242 .n(n)
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070011243 .k(k)
11244 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080011245 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070011246 }
11247 }
11248 }
11249
Frank Barcharde22685a2021-11-12 11:36:58 -080011250 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL, n_gt_8_strided_a) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070011251 TEST_REQUIRES_ARM_NEON;
11252 for (uint32_t n = 9; n < 16; n++) {
11253 for (size_t k = 1; k <= 80; k += 17) {
11254 GemmMicrokernelTester()
11255 .mr(2)
11256 .nr(8)
11257 .kr(8)
11258 .sr(1)
11259 .m(2)
11260 .n(n)
11261 .k(k)
11262 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080011263 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070011264 }
11265 }
11266 }
11267
Frank Barcharde22685a2021-11-12 11:36:58 -080011268 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL, n_gt_8_subtile) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070011269 TEST_REQUIRES_ARM_NEON;
11270 for (uint32_t n = 9; n < 16; n++) {
11271 for (size_t k = 1; k <= 80; k += 17) {
11272 for (uint32_t m = 1; m <= 2; m++) {
11273 GemmMicrokernelTester()
11274 .mr(2)
11275 .nr(8)
11276 .kr(8)
11277 .sr(1)
11278 .m(m)
11279 .n(n)
11280 .k(k)
11281 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011282 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070011283 }
11284 }
11285 }
11286 }
11287
Frank Barcharde22685a2021-11-12 11:36:58 -080011288 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL, n_div_8) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070011289 TEST_REQUIRES_ARM_NEON;
11290 for (uint32_t n = 16; n <= 24; n += 8) {
11291 for (size_t k = 1; k <= 80; k += 17) {
11292 GemmMicrokernelTester()
11293 .mr(2)
11294 .nr(8)
11295 .kr(8)
11296 .sr(1)
11297 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011298 .n(n)
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070011299 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080011300 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070011301 }
11302 }
11303 }
11304
Frank Barcharde22685a2021-11-12 11:36:58 -080011305 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL, n_div_8_strided_cn) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070011306 TEST_REQUIRES_ARM_NEON;
11307 for (uint32_t n = 16; n <= 24; n += 8) {
11308 for (size_t k = 1; k <= 80; k += 17) {
11309 GemmMicrokernelTester()
11310 .mr(2)
11311 .nr(8)
11312 .kr(8)
11313 .sr(1)
11314 .m(2)
11315 .n(n)
11316 .k(k)
11317 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080011318 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070011319 }
11320 }
11321 }
11322
Frank Barcharde22685a2021-11-12 11:36:58 -080011323 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL, n_div_8_strided_a) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070011324 TEST_REQUIRES_ARM_NEON;
11325 for (uint32_t n = 16; n <= 24; n += 8) {
11326 for (size_t k = 1; k <= 80; k += 17) {
11327 GemmMicrokernelTester()
11328 .mr(2)
11329 .nr(8)
11330 .kr(8)
11331 .sr(1)
11332 .m(2)
11333 .n(n)
11334 .k(k)
11335 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080011336 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070011337 }
11338 }
11339 }
11340
Frank Barcharde22685a2021-11-12 11:36:58 -080011341 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL, n_div_8_subtile) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070011342 TEST_REQUIRES_ARM_NEON;
11343 for (uint32_t n = 16; n <= 24; n += 8) {
11344 for (size_t k = 1; k <= 80; k += 17) {
11345 for (uint32_t m = 1; m <= 2; m++) {
11346 GemmMicrokernelTester()
11347 .mr(2)
11348 .nr(8)
11349 .kr(8)
11350 .sr(1)
11351 .m(m)
11352 .n(n)
11353 .k(k)
11354 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011355 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070011356 }
11357 }
11358 }
11359 }
11360
Frank Barcharde22685a2021-11-12 11:36:58 -080011361 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL, strided_cm_subtile) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070011362 TEST_REQUIRES_ARM_NEON;
11363 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011364 for (uint32_t n = 1; n <= 8; n++) {
11365 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070011366 GemmMicrokernelTester()
11367 .mr(2)
11368 .nr(8)
11369 .kr(8)
11370 .sr(1)
11371 .m(m)
11372 .n(n)
11373 .k(k)
11374 .cm_stride(11)
11375 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011376 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070011377 }
11378 }
11379 }
11380 }
11381
Frank Barcharde22685a2021-11-12 11:36:58 -080011382 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL, qmin) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070011383 TEST_REQUIRES_ARM_NEON;
11384 GemmMicrokernelTester()
11385 .mr(2)
11386 .nr(8)
11387 .kr(8)
11388 .sr(1)
11389 .m(2)
11390 .n(8)
11391 .k(16)
11392 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011393 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070011394 }
11395
Frank Barcharde22685a2021-11-12 11:36:58 -080011396 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL, qmax) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070011397 TEST_REQUIRES_ARM_NEON;
11398 GemmMicrokernelTester()
11399 .mr(2)
11400 .nr(8)
11401 .kr(8)
11402 .sr(1)
11403 .m(2)
11404 .n(8)
11405 .k(16)
11406 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011407 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070011408 }
11409
Frank Barcharde22685a2021-11-12 11:36:58 -080011410 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__NEON_MLAL, strided_cm) {
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070011411 TEST_REQUIRES_ARM_NEON;
11412 GemmMicrokernelTester()
11413 .mr(2)
11414 .nr(8)
11415 .kr(8)
11416 .sr(1)
11417 .m(2)
11418 .n(8)
11419 .k(16)
11420 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080011421 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
Marat Dukhan2d3c97c2021-06-25 18:00:28 -070011422 }
11423#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
11424
11425
Marat Dukhan18630de2021-06-02 22:20:01 -070011426#if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
11427 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEONDOT, k_eq_8) {
11428 TEST_REQUIRES_ARM_NEON_DOT;
11429 GemmMicrokernelTester()
11430 .mr(1)
11431 .nr(8)
11432 .kr(4)
11433 .sr(1)
11434 .m(1)
11435 .n(8)
11436 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080011437 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan18630de2021-06-02 22:20:01 -070011438 }
11439
11440 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEONDOT, strided_cn) {
11441 TEST_REQUIRES_ARM_NEON_DOT;
11442 GemmMicrokernelTester()
11443 .mr(1)
11444 .nr(8)
11445 .kr(4)
11446 .sr(1)
11447 .m(1)
11448 .n(8)
11449 .k(8)
11450 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080011451 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan18630de2021-06-02 22:20:01 -070011452 }
11453
11454 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEONDOT, k_eq_8_strided_a) {
11455 TEST_REQUIRES_ARM_NEON_DOT;
11456 GemmMicrokernelTester()
11457 .mr(1)
11458 .nr(8)
11459 .kr(4)
11460 .sr(1)
11461 .m(1)
11462 .n(8)
11463 .k(8)
11464 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080011465 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan18630de2021-06-02 22:20:01 -070011466 }
11467
11468 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEONDOT, k_eq_8_subtile) {
11469 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng83844ae2022-01-14 09:52:25 -080011470 for (uint32_t n = 1; n <= 8; n++) {
11471 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan18630de2021-06-02 22:20:01 -070011472 GemmMicrokernelTester()
11473 .mr(1)
11474 .nr(8)
11475 .kr(4)
11476 .sr(1)
11477 .m(m)
11478 .n(n)
11479 .k(8)
11480 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011481 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan18630de2021-06-02 22:20:01 -070011482 }
11483 }
11484 }
11485
11486 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEONDOT, k_eq_8_subtile_m) {
11487 TEST_REQUIRES_ARM_NEON_DOT;
11488 for (uint32_t m = 1; m <= 1; m++) {
11489 GemmMicrokernelTester()
11490 .mr(1)
11491 .nr(8)
11492 .kr(4)
11493 .sr(1)
11494 .m(m)
11495 .n(8)
11496 .k(8)
11497 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011498 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan18630de2021-06-02 22:20:01 -070011499 }
11500 }
11501
11502 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEONDOT, k_eq_8_subtile_n) {
11503 TEST_REQUIRES_ARM_NEON_DOT;
11504 for (uint32_t n = 1; n <= 8; n++) {
11505 GemmMicrokernelTester()
11506 .mr(1)
11507 .nr(8)
11508 .kr(4)
11509 .sr(1)
11510 .m(1)
11511 .n(n)
11512 .k(8)
11513 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011514 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan18630de2021-06-02 22:20:01 -070011515 }
11516 }
11517
11518 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEONDOT, k_lt_8) {
11519 TEST_REQUIRES_ARM_NEON_DOT;
11520 for (size_t k = 1; k < 8; k++) {
11521 GemmMicrokernelTester()
11522 .mr(1)
11523 .nr(8)
11524 .kr(4)
11525 .sr(1)
11526 .m(1)
11527 .n(8)
11528 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080011529 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan18630de2021-06-02 22:20:01 -070011530 }
11531 }
11532
11533 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEONDOT, k_lt_8_strided_a) {
11534 TEST_REQUIRES_ARM_NEON_DOT;
11535 for (size_t k = 1; k < 8; k++) {
11536 GemmMicrokernelTester()
11537 .mr(1)
11538 .nr(8)
11539 .kr(4)
11540 .sr(1)
11541 .m(1)
11542 .n(8)
11543 .k(k)
11544 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080011545 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan18630de2021-06-02 22:20:01 -070011546 }
11547 }
11548
11549 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEONDOT, k_lt_8_subtile) {
11550 TEST_REQUIRES_ARM_NEON_DOT;
11551 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011552 for (uint32_t n = 1; n <= 8; n++) {
11553 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan18630de2021-06-02 22:20:01 -070011554 GemmMicrokernelTester()
11555 .mr(1)
11556 .nr(8)
11557 .kr(4)
11558 .sr(1)
11559 .m(m)
11560 .n(n)
11561 .k(k)
11562 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011563 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan18630de2021-06-02 22:20:01 -070011564 }
11565 }
11566 }
11567 }
11568
11569 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEONDOT, k_gt_8) {
11570 TEST_REQUIRES_ARM_NEON_DOT;
11571 for (size_t k = 9; k < 16; k++) {
11572 GemmMicrokernelTester()
11573 .mr(1)
11574 .nr(8)
11575 .kr(4)
11576 .sr(1)
11577 .m(1)
11578 .n(8)
11579 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080011580 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan18630de2021-06-02 22:20:01 -070011581 }
11582 }
11583
11584 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEONDOT, k_gt_8_strided_a) {
11585 TEST_REQUIRES_ARM_NEON_DOT;
11586 for (size_t k = 9; k < 16; k++) {
11587 GemmMicrokernelTester()
11588 .mr(1)
11589 .nr(8)
11590 .kr(4)
11591 .sr(1)
11592 .m(1)
11593 .n(8)
11594 .k(k)
11595 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080011596 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan18630de2021-06-02 22:20:01 -070011597 }
11598 }
11599
11600 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEONDOT, k_gt_8_subtile) {
11601 TEST_REQUIRES_ARM_NEON_DOT;
11602 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011603 for (uint32_t n = 1; n <= 8; n++) {
11604 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan18630de2021-06-02 22:20:01 -070011605 GemmMicrokernelTester()
11606 .mr(1)
11607 .nr(8)
11608 .kr(4)
11609 .sr(1)
11610 .m(m)
11611 .n(n)
11612 .k(k)
11613 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011614 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan18630de2021-06-02 22:20:01 -070011615 }
11616 }
11617 }
11618 }
11619
11620 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEONDOT, k_div_8) {
11621 TEST_REQUIRES_ARM_NEON_DOT;
11622 for (size_t k = 16; k <= 80; k += 8) {
11623 GemmMicrokernelTester()
11624 .mr(1)
11625 .nr(8)
11626 .kr(4)
11627 .sr(1)
11628 .m(1)
11629 .n(8)
11630 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080011631 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan18630de2021-06-02 22:20:01 -070011632 }
11633 }
11634
11635 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEONDOT, k_div_8_strided_a) {
11636 TEST_REQUIRES_ARM_NEON_DOT;
11637 for (size_t k = 16; k <= 80; k += 8) {
11638 GemmMicrokernelTester()
11639 .mr(1)
11640 .nr(8)
11641 .kr(4)
11642 .sr(1)
11643 .m(1)
11644 .n(8)
11645 .k(k)
11646 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080011647 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan18630de2021-06-02 22:20:01 -070011648 }
11649 }
11650
11651 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEONDOT, k_div_8_subtile) {
11652 TEST_REQUIRES_ARM_NEON_DOT;
11653 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011654 for (uint32_t n = 1; n <= 8; n++) {
11655 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan18630de2021-06-02 22:20:01 -070011656 GemmMicrokernelTester()
11657 .mr(1)
11658 .nr(8)
11659 .kr(4)
11660 .sr(1)
11661 .m(m)
11662 .n(n)
11663 .k(k)
11664 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011665 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan18630de2021-06-02 22:20:01 -070011666 }
11667 }
11668 }
11669 }
11670
11671 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEONDOT, n_gt_8) {
11672 TEST_REQUIRES_ARM_NEON_DOT;
11673 for (uint32_t n = 9; n < 16; n++) {
11674 for (size_t k = 1; k <= 40; k += 9) {
11675 GemmMicrokernelTester()
11676 .mr(1)
11677 .nr(8)
11678 .kr(4)
11679 .sr(1)
11680 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011681 .n(n)
Marat Dukhan18630de2021-06-02 22:20:01 -070011682 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080011683 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan18630de2021-06-02 22:20:01 -070011684 }
11685 }
11686 }
11687
11688 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEONDOT, n_gt_8_strided_cn) {
11689 TEST_REQUIRES_ARM_NEON_DOT;
11690 for (uint32_t n = 9; n < 16; n++) {
11691 for (size_t k = 1; k <= 40; k += 9) {
11692 GemmMicrokernelTester()
11693 .mr(1)
11694 .nr(8)
11695 .kr(4)
11696 .sr(1)
11697 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011698 .n(n)
Marat Dukhan18630de2021-06-02 22:20:01 -070011699 .k(k)
11700 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080011701 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan18630de2021-06-02 22:20:01 -070011702 }
11703 }
11704 }
11705
11706 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEONDOT, n_gt_8_strided_a) {
11707 TEST_REQUIRES_ARM_NEON_DOT;
11708 for (uint32_t n = 9; n < 16; n++) {
11709 for (size_t k = 1; k <= 40; k += 9) {
11710 GemmMicrokernelTester()
11711 .mr(1)
11712 .nr(8)
11713 .kr(4)
11714 .sr(1)
11715 .m(1)
11716 .n(n)
11717 .k(k)
11718 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080011719 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan18630de2021-06-02 22:20:01 -070011720 }
11721 }
11722 }
11723
11724 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEONDOT, n_gt_8_subtile) {
11725 TEST_REQUIRES_ARM_NEON_DOT;
11726 for (uint32_t n = 9; n < 16; n++) {
11727 for (size_t k = 1; k <= 40; k += 9) {
11728 for (uint32_t m = 1; m <= 1; m++) {
11729 GemmMicrokernelTester()
11730 .mr(1)
11731 .nr(8)
11732 .kr(4)
11733 .sr(1)
11734 .m(m)
11735 .n(n)
11736 .k(k)
11737 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011738 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan18630de2021-06-02 22:20:01 -070011739 }
11740 }
11741 }
11742 }
11743
11744 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEONDOT, n_div_8) {
11745 TEST_REQUIRES_ARM_NEON_DOT;
11746 for (uint32_t n = 16; n <= 24; n += 8) {
11747 for (size_t k = 1; k <= 40; k += 9) {
11748 GemmMicrokernelTester()
11749 .mr(1)
11750 .nr(8)
11751 .kr(4)
11752 .sr(1)
11753 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011754 .n(n)
Marat Dukhan18630de2021-06-02 22:20:01 -070011755 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080011756 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan18630de2021-06-02 22:20:01 -070011757 }
11758 }
11759 }
11760
11761 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEONDOT, n_div_8_strided_cn) {
11762 TEST_REQUIRES_ARM_NEON_DOT;
11763 for (uint32_t n = 16; n <= 24; n += 8) {
11764 for (size_t k = 1; k <= 40; k += 9) {
11765 GemmMicrokernelTester()
11766 .mr(1)
11767 .nr(8)
11768 .kr(4)
11769 .sr(1)
11770 .m(1)
11771 .n(n)
11772 .k(k)
11773 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080011774 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan18630de2021-06-02 22:20:01 -070011775 }
11776 }
11777 }
11778
11779 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEONDOT, n_div_8_strided_a) {
11780 TEST_REQUIRES_ARM_NEON_DOT;
11781 for (uint32_t n = 16; n <= 24; n += 8) {
11782 for (size_t k = 1; k <= 40; k += 9) {
11783 GemmMicrokernelTester()
11784 .mr(1)
11785 .nr(8)
11786 .kr(4)
11787 .sr(1)
11788 .m(1)
11789 .n(n)
11790 .k(k)
11791 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080011792 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan18630de2021-06-02 22:20:01 -070011793 }
11794 }
11795 }
11796
11797 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEONDOT, n_div_8_subtile) {
11798 TEST_REQUIRES_ARM_NEON_DOT;
11799 for (uint32_t n = 16; n <= 24; n += 8) {
11800 for (size_t k = 1; k <= 40; k += 9) {
11801 for (uint32_t m = 1; m <= 1; m++) {
11802 GemmMicrokernelTester()
11803 .mr(1)
11804 .nr(8)
11805 .kr(4)
11806 .sr(1)
11807 .m(m)
11808 .n(n)
11809 .k(k)
11810 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011811 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan18630de2021-06-02 22:20:01 -070011812 }
11813 }
11814 }
11815 }
11816
11817 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEONDOT, strided_cm_subtile) {
11818 TEST_REQUIRES_ARM_NEON_DOT;
11819 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011820 for (uint32_t n = 1; n <= 8; n++) {
11821 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan18630de2021-06-02 22:20:01 -070011822 GemmMicrokernelTester()
11823 .mr(1)
11824 .nr(8)
11825 .kr(4)
11826 .sr(1)
11827 .m(m)
11828 .n(n)
11829 .k(k)
11830 .cm_stride(11)
11831 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011832 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan18630de2021-06-02 22:20:01 -070011833 }
11834 }
11835 }
11836 }
11837
11838 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEONDOT, qmin) {
11839 TEST_REQUIRES_ARM_NEON_DOT;
11840 GemmMicrokernelTester()
11841 .mr(1)
11842 .nr(8)
11843 .kr(4)
11844 .sr(1)
11845 .m(1)
11846 .n(8)
11847 .k(8)
11848 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011849 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan18630de2021-06-02 22:20:01 -070011850 }
11851
11852 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEONDOT, qmax) {
11853 TEST_REQUIRES_ARM_NEON_DOT;
11854 GemmMicrokernelTester()
11855 .mr(1)
11856 .nr(8)
11857 .kr(4)
11858 .sr(1)
11859 .m(1)
11860 .n(8)
11861 .k(8)
11862 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011863 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan18630de2021-06-02 22:20:01 -070011864 }
11865
11866 TEST(QS8_GEMM_MINMAX_FP32_1X8C4__NEONDOT, strided_cm) {
11867 TEST_REQUIRES_ARM_NEON_DOT;
11868 GemmMicrokernelTester()
11869 .mr(1)
11870 .nr(8)
11871 .kr(4)
11872 .sr(1)
11873 .m(1)
11874 .n(8)
11875 .k(8)
11876 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080011877 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
Marat Dukhan18630de2021-06-02 22:20:01 -070011878 }
11879#endif // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
11880
11881
Marat Dukhan9b474cf2021-05-25 16:37:48 -070011882#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhanc46e6712021-06-01 19:00:16 -070011883 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_eq_8) {
11884 TEST_REQUIRES_X86_SSE2;
11885 GemmMicrokernelTester()
11886 .mr(1)
11887 .nr(4)
11888 .kr(2)
11889 .sr(1)
11890 .m(1)
11891 .n(4)
11892 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080011893 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070011894 }
11895
11896 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64, strided_cn) {
11897 TEST_REQUIRES_X86_SSE2;
11898 GemmMicrokernelTester()
11899 .mr(1)
11900 .nr(4)
11901 .kr(2)
11902 .sr(1)
11903 .m(1)
11904 .n(4)
11905 .k(8)
11906 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080011907 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070011908 }
11909
11910 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_eq_8_strided_a) {
11911 TEST_REQUIRES_X86_SSE2;
11912 GemmMicrokernelTester()
11913 .mr(1)
11914 .nr(4)
11915 .kr(2)
11916 .sr(1)
11917 .m(1)
11918 .n(4)
11919 .k(8)
11920 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080011921 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070011922 }
11923
11924 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_eq_8_subtile) {
11925 TEST_REQUIRES_X86_SSE2;
Zhi An Ng83844ae2022-01-14 09:52:25 -080011926 for (uint32_t n = 1; n <= 4; n++) {
11927 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070011928 GemmMicrokernelTester()
11929 .mr(1)
11930 .nr(4)
11931 .kr(2)
11932 .sr(1)
11933 .m(m)
11934 .n(n)
11935 .k(8)
11936 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011937 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070011938 }
11939 }
11940 }
11941
11942 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_eq_8_subtile_m) {
11943 TEST_REQUIRES_X86_SSE2;
11944 for (uint32_t m = 1; m <= 1; m++) {
11945 GemmMicrokernelTester()
11946 .mr(1)
11947 .nr(4)
11948 .kr(2)
11949 .sr(1)
11950 .m(m)
11951 .n(4)
11952 .k(8)
11953 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011954 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070011955 }
11956 }
11957
11958 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_eq_8_subtile_n) {
11959 TEST_REQUIRES_X86_SSE2;
11960 for (uint32_t n = 1; n <= 4; n++) {
11961 GemmMicrokernelTester()
11962 .mr(1)
11963 .nr(4)
11964 .kr(2)
11965 .sr(1)
11966 .m(1)
11967 .n(n)
11968 .k(8)
11969 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011970 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070011971 }
11972 }
11973
11974 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_lt_8) {
11975 TEST_REQUIRES_X86_SSE2;
11976 for (size_t k = 1; k < 8; k++) {
11977 GemmMicrokernelTester()
11978 .mr(1)
11979 .nr(4)
11980 .kr(2)
11981 .sr(1)
11982 .m(1)
11983 .n(4)
11984 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080011985 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070011986 }
11987 }
11988
11989 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_lt_8_strided_a) {
11990 TEST_REQUIRES_X86_SSE2;
11991 for (size_t k = 1; k < 8; k++) {
11992 GemmMicrokernelTester()
11993 .mr(1)
11994 .nr(4)
11995 .kr(2)
11996 .sr(1)
11997 .m(1)
11998 .n(4)
11999 .k(k)
12000 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080012001 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070012002 }
12003 }
12004
12005 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_lt_8_subtile) {
12006 TEST_REQUIRES_X86_SSE2;
12007 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012008 for (uint32_t n = 1; n <= 4; n++) {
12009 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070012010 GemmMicrokernelTester()
12011 .mr(1)
12012 .nr(4)
12013 .kr(2)
12014 .sr(1)
12015 .m(m)
12016 .n(n)
12017 .k(k)
12018 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012019 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070012020 }
12021 }
12022 }
12023 }
12024
12025 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_gt_8) {
12026 TEST_REQUIRES_X86_SSE2;
12027 for (size_t k = 9; k < 16; k++) {
12028 GemmMicrokernelTester()
12029 .mr(1)
12030 .nr(4)
12031 .kr(2)
12032 .sr(1)
12033 .m(1)
12034 .n(4)
12035 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080012036 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070012037 }
12038 }
12039
12040 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_gt_8_strided_a) {
12041 TEST_REQUIRES_X86_SSE2;
12042 for (size_t k = 9; k < 16; k++) {
12043 GemmMicrokernelTester()
12044 .mr(1)
12045 .nr(4)
12046 .kr(2)
12047 .sr(1)
12048 .m(1)
12049 .n(4)
12050 .k(k)
12051 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080012052 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070012053 }
12054 }
12055
12056 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_gt_8_subtile) {
12057 TEST_REQUIRES_X86_SSE2;
12058 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012059 for (uint32_t n = 1; n <= 4; n++) {
12060 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070012061 GemmMicrokernelTester()
12062 .mr(1)
12063 .nr(4)
12064 .kr(2)
12065 .sr(1)
12066 .m(m)
12067 .n(n)
12068 .k(k)
12069 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012070 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070012071 }
12072 }
12073 }
12074 }
12075
12076 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_div_8) {
12077 TEST_REQUIRES_X86_SSE2;
12078 for (size_t k = 16; k <= 80; k += 8) {
12079 GemmMicrokernelTester()
12080 .mr(1)
12081 .nr(4)
12082 .kr(2)
12083 .sr(1)
12084 .m(1)
12085 .n(4)
12086 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080012087 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070012088 }
12089 }
12090
12091 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_div_8_strided_a) {
12092 TEST_REQUIRES_X86_SSE2;
12093 for (size_t k = 16; k <= 80; k += 8) {
12094 GemmMicrokernelTester()
12095 .mr(1)
12096 .nr(4)
12097 .kr(2)
12098 .sr(1)
12099 .m(1)
12100 .n(4)
12101 .k(k)
12102 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080012103 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070012104 }
12105 }
12106
12107 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_div_8_subtile) {
12108 TEST_REQUIRES_X86_SSE2;
12109 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012110 for (uint32_t n = 1; n <= 4; n++) {
12111 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070012112 GemmMicrokernelTester()
12113 .mr(1)
12114 .nr(4)
12115 .kr(2)
12116 .sr(1)
12117 .m(m)
12118 .n(n)
12119 .k(k)
12120 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012121 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070012122 }
12123 }
12124 }
12125 }
12126
12127 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64, n_gt_4) {
12128 TEST_REQUIRES_X86_SSE2;
12129 for (uint32_t n = 5; n < 8; n++) {
12130 for (size_t k = 1; k <= 40; k += 9) {
12131 GemmMicrokernelTester()
12132 .mr(1)
12133 .nr(4)
12134 .kr(2)
12135 .sr(1)
12136 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012137 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070012138 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080012139 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070012140 }
12141 }
12142 }
12143
12144 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64, n_gt_4_strided_cn) {
12145 TEST_REQUIRES_X86_SSE2;
12146 for (uint32_t n = 5; n < 8; n++) {
12147 for (size_t k = 1; k <= 40; k += 9) {
12148 GemmMicrokernelTester()
12149 .mr(1)
12150 .nr(4)
12151 .kr(2)
12152 .sr(1)
12153 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012154 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070012155 .k(k)
12156 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080012157 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070012158 }
12159 }
12160 }
12161
12162 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64, n_gt_4_strided_a) {
12163 TEST_REQUIRES_X86_SSE2;
12164 for (uint32_t n = 5; n < 8; n++) {
12165 for (size_t k = 1; k <= 40; k += 9) {
12166 GemmMicrokernelTester()
12167 .mr(1)
12168 .nr(4)
12169 .kr(2)
12170 .sr(1)
12171 .m(1)
12172 .n(n)
12173 .k(k)
12174 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080012175 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070012176 }
12177 }
12178 }
12179
12180 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64, n_gt_4_subtile) {
12181 TEST_REQUIRES_X86_SSE2;
12182 for (uint32_t n = 5; n < 8; n++) {
12183 for (size_t k = 1; k <= 40; k += 9) {
12184 for (uint32_t m = 1; m <= 1; m++) {
12185 GemmMicrokernelTester()
12186 .mr(1)
12187 .nr(4)
12188 .kr(2)
12189 .sr(1)
12190 .m(m)
12191 .n(n)
12192 .k(k)
12193 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012194 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070012195 }
12196 }
12197 }
12198 }
12199
12200 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64, n_div_4) {
12201 TEST_REQUIRES_X86_SSE2;
12202 for (uint32_t n = 8; n <= 12; n += 4) {
12203 for (size_t k = 1; k <= 40; k += 9) {
12204 GemmMicrokernelTester()
12205 .mr(1)
12206 .nr(4)
12207 .kr(2)
12208 .sr(1)
12209 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012210 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070012211 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080012212 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070012213 }
12214 }
12215 }
12216
12217 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64, n_div_4_strided_cn) {
12218 TEST_REQUIRES_X86_SSE2;
12219 for (uint32_t n = 8; n <= 12; n += 4) {
12220 for (size_t k = 1; k <= 40; k += 9) {
12221 GemmMicrokernelTester()
12222 .mr(1)
12223 .nr(4)
12224 .kr(2)
12225 .sr(1)
12226 .m(1)
12227 .n(n)
12228 .k(k)
12229 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080012230 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070012231 }
12232 }
12233 }
12234
12235 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64, n_div_4_strided_a) {
12236 TEST_REQUIRES_X86_SSE2;
12237 for (uint32_t n = 8; n <= 12; n += 4) {
12238 for (size_t k = 1; k <= 40; k += 9) {
12239 GemmMicrokernelTester()
12240 .mr(1)
12241 .nr(4)
12242 .kr(2)
12243 .sr(1)
12244 .m(1)
12245 .n(n)
12246 .k(k)
12247 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080012248 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070012249 }
12250 }
12251 }
12252
12253 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64, n_div_4_subtile) {
12254 TEST_REQUIRES_X86_SSE2;
12255 for (uint32_t n = 8; n <= 12; n += 4) {
12256 for (size_t k = 1; k <= 40; k += 9) {
12257 for (uint32_t m = 1; m <= 1; m++) {
12258 GemmMicrokernelTester()
12259 .mr(1)
12260 .nr(4)
12261 .kr(2)
12262 .sr(1)
12263 .m(m)
12264 .n(n)
12265 .k(k)
12266 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012267 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070012268 }
12269 }
12270 }
12271 }
12272
12273 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64, strided_cm_subtile) {
12274 TEST_REQUIRES_X86_SSE2;
12275 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012276 for (uint32_t n = 1; n <= 4; n++) {
12277 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070012278 GemmMicrokernelTester()
12279 .mr(1)
12280 .nr(4)
12281 .kr(2)
12282 .sr(1)
12283 .m(m)
12284 .n(n)
12285 .k(k)
12286 .cm_stride(7)
12287 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012288 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070012289 }
12290 }
12291 }
12292 }
12293
12294 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64, qmin) {
12295 TEST_REQUIRES_X86_SSE2;
12296 GemmMicrokernelTester()
12297 .mr(1)
12298 .nr(4)
12299 .kr(2)
12300 .sr(1)
12301 .m(1)
12302 .n(4)
12303 .k(8)
12304 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012305 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070012306 }
12307
12308 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64, qmax) {
12309 TEST_REQUIRES_X86_SSE2;
12310 GemmMicrokernelTester()
12311 .mr(1)
12312 .nr(4)
12313 .kr(2)
12314 .sr(1)
12315 .m(1)
12316 .n(4)
12317 .k(8)
12318 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012319 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070012320 }
12321
12322 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD64, strided_cm) {
12323 TEST_REQUIRES_X86_SSE2;
12324 GemmMicrokernelTester()
12325 .mr(1)
12326 .nr(4)
12327 .kr(2)
12328 .sr(1)
12329 .m(1)
12330 .n(4)
12331 .k(8)
12332 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080012333 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070012334 }
12335#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
12336
12337
12338#if XNN_ARCH_X86 || XNN_ARCH_X86_64
12339 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_eq_8) {
12340 TEST_REQUIRES_X86_SSE2;
12341 GemmMicrokernelTester()
12342 .mr(2)
12343 .nr(4)
12344 .kr(2)
12345 .sr(1)
12346 .m(2)
12347 .n(4)
12348 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080012349 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070012350 }
12351
12352 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64, strided_cn) {
12353 TEST_REQUIRES_X86_SSE2;
12354 GemmMicrokernelTester()
12355 .mr(2)
12356 .nr(4)
12357 .kr(2)
12358 .sr(1)
12359 .m(2)
12360 .n(4)
12361 .k(8)
12362 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080012363 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070012364 }
12365
12366 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_eq_8_strided_a) {
12367 TEST_REQUIRES_X86_SSE2;
12368 GemmMicrokernelTester()
12369 .mr(2)
12370 .nr(4)
12371 .kr(2)
12372 .sr(1)
12373 .m(2)
12374 .n(4)
12375 .k(8)
12376 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080012377 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070012378 }
12379
12380 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_eq_8_subtile) {
12381 TEST_REQUIRES_X86_SSE2;
Zhi An Ng83844ae2022-01-14 09:52:25 -080012382 for (uint32_t n = 1; n <= 4; n++) {
12383 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070012384 GemmMicrokernelTester()
12385 .mr(2)
12386 .nr(4)
12387 .kr(2)
12388 .sr(1)
12389 .m(m)
12390 .n(n)
12391 .k(8)
12392 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012393 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070012394 }
12395 }
12396 }
12397
12398 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_eq_8_subtile_m) {
12399 TEST_REQUIRES_X86_SSE2;
12400 for (uint32_t m = 1; m <= 2; m++) {
12401 GemmMicrokernelTester()
12402 .mr(2)
12403 .nr(4)
12404 .kr(2)
12405 .sr(1)
12406 .m(m)
12407 .n(4)
12408 .k(8)
12409 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012410 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070012411 }
12412 }
12413
12414 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_eq_8_subtile_n) {
12415 TEST_REQUIRES_X86_SSE2;
12416 for (uint32_t n = 1; n <= 4; n++) {
12417 GemmMicrokernelTester()
12418 .mr(2)
12419 .nr(4)
12420 .kr(2)
12421 .sr(1)
12422 .m(2)
12423 .n(n)
12424 .k(8)
12425 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012426 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070012427 }
12428 }
12429
12430 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_lt_8) {
12431 TEST_REQUIRES_X86_SSE2;
12432 for (size_t k = 1; k < 8; k++) {
12433 GemmMicrokernelTester()
12434 .mr(2)
12435 .nr(4)
12436 .kr(2)
12437 .sr(1)
12438 .m(2)
12439 .n(4)
12440 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080012441 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070012442 }
12443 }
12444
12445 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_lt_8_strided_a) {
12446 TEST_REQUIRES_X86_SSE2;
12447 for (size_t k = 1; k < 8; k++) {
12448 GemmMicrokernelTester()
12449 .mr(2)
12450 .nr(4)
12451 .kr(2)
12452 .sr(1)
12453 .m(2)
12454 .n(4)
12455 .k(k)
12456 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080012457 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070012458 }
12459 }
12460
12461 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_lt_8_subtile) {
12462 TEST_REQUIRES_X86_SSE2;
12463 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012464 for (uint32_t n = 1; n <= 4; n++) {
12465 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070012466 GemmMicrokernelTester()
12467 .mr(2)
12468 .nr(4)
12469 .kr(2)
12470 .sr(1)
12471 .m(m)
12472 .n(n)
12473 .k(k)
12474 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012475 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070012476 }
12477 }
12478 }
12479 }
12480
12481 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_gt_8) {
12482 TEST_REQUIRES_X86_SSE2;
12483 for (size_t k = 9; k < 16; k++) {
12484 GemmMicrokernelTester()
12485 .mr(2)
12486 .nr(4)
12487 .kr(2)
12488 .sr(1)
12489 .m(2)
12490 .n(4)
12491 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080012492 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070012493 }
12494 }
12495
12496 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_gt_8_strided_a) {
12497 TEST_REQUIRES_X86_SSE2;
12498 for (size_t k = 9; k < 16; k++) {
12499 GemmMicrokernelTester()
12500 .mr(2)
12501 .nr(4)
12502 .kr(2)
12503 .sr(1)
12504 .m(2)
12505 .n(4)
12506 .k(k)
12507 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080012508 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070012509 }
12510 }
12511
12512 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_gt_8_subtile) {
12513 TEST_REQUIRES_X86_SSE2;
12514 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012515 for (uint32_t n = 1; n <= 4; n++) {
12516 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070012517 GemmMicrokernelTester()
12518 .mr(2)
12519 .nr(4)
12520 .kr(2)
12521 .sr(1)
12522 .m(m)
12523 .n(n)
12524 .k(k)
12525 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012526 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070012527 }
12528 }
12529 }
12530 }
12531
12532 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_div_8) {
12533 TEST_REQUIRES_X86_SSE2;
12534 for (size_t k = 16; k <= 80; k += 8) {
12535 GemmMicrokernelTester()
12536 .mr(2)
12537 .nr(4)
12538 .kr(2)
12539 .sr(1)
12540 .m(2)
12541 .n(4)
12542 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080012543 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070012544 }
12545 }
12546
12547 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_div_8_strided_a) {
12548 TEST_REQUIRES_X86_SSE2;
12549 for (size_t k = 16; k <= 80; k += 8) {
12550 GemmMicrokernelTester()
12551 .mr(2)
12552 .nr(4)
12553 .kr(2)
12554 .sr(1)
12555 .m(2)
12556 .n(4)
12557 .k(k)
12558 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080012559 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070012560 }
12561 }
12562
12563 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_div_8_subtile) {
12564 TEST_REQUIRES_X86_SSE2;
12565 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012566 for (uint32_t n = 1; n <= 4; n++) {
12567 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070012568 GemmMicrokernelTester()
12569 .mr(2)
12570 .nr(4)
12571 .kr(2)
12572 .sr(1)
12573 .m(m)
12574 .n(n)
12575 .k(k)
12576 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012577 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070012578 }
12579 }
12580 }
12581 }
12582
12583 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64, n_gt_4) {
12584 TEST_REQUIRES_X86_SSE2;
12585 for (uint32_t n = 5; n < 8; n++) {
12586 for (size_t k = 1; k <= 40; k += 9) {
12587 GemmMicrokernelTester()
12588 .mr(2)
12589 .nr(4)
12590 .kr(2)
12591 .sr(1)
12592 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012593 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070012594 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080012595 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070012596 }
12597 }
12598 }
12599
12600 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64, n_gt_4_strided_cn) {
12601 TEST_REQUIRES_X86_SSE2;
12602 for (uint32_t n = 5; n < 8; n++) {
12603 for (size_t k = 1; k <= 40; k += 9) {
12604 GemmMicrokernelTester()
12605 .mr(2)
12606 .nr(4)
12607 .kr(2)
12608 .sr(1)
12609 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012610 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070012611 .k(k)
12612 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080012613 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070012614 }
12615 }
12616 }
12617
12618 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64, n_gt_4_strided_a) {
12619 TEST_REQUIRES_X86_SSE2;
12620 for (uint32_t n = 5; n < 8; n++) {
12621 for (size_t k = 1; k <= 40; k += 9) {
12622 GemmMicrokernelTester()
12623 .mr(2)
12624 .nr(4)
12625 .kr(2)
12626 .sr(1)
12627 .m(2)
12628 .n(n)
12629 .k(k)
12630 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080012631 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070012632 }
12633 }
12634 }
12635
12636 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64, n_gt_4_subtile) {
12637 TEST_REQUIRES_X86_SSE2;
12638 for (uint32_t n = 5; n < 8; n++) {
12639 for (size_t k = 1; k <= 40; k += 9) {
12640 for (uint32_t m = 1; m <= 2; m++) {
12641 GemmMicrokernelTester()
12642 .mr(2)
12643 .nr(4)
12644 .kr(2)
12645 .sr(1)
12646 .m(m)
12647 .n(n)
12648 .k(k)
12649 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012650 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070012651 }
12652 }
12653 }
12654 }
12655
12656 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64, n_div_4) {
12657 TEST_REQUIRES_X86_SSE2;
12658 for (uint32_t n = 8; n <= 12; n += 4) {
12659 for (size_t k = 1; k <= 40; k += 9) {
12660 GemmMicrokernelTester()
12661 .mr(2)
12662 .nr(4)
12663 .kr(2)
12664 .sr(1)
12665 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012666 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070012667 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080012668 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070012669 }
12670 }
12671 }
12672
12673 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64, n_div_4_strided_cn) {
12674 TEST_REQUIRES_X86_SSE2;
12675 for (uint32_t n = 8; n <= 12; n += 4) {
12676 for (size_t k = 1; k <= 40; k += 9) {
12677 GemmMicrokernelTester()
12678 .mr(2)
12679 .nr(4)
12680 .kr(2)
12681 .sr(1)
12682 .m(2)
12683 .n(n)
12684 .k(k)
12685 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080012686 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070012687 }
12688 }
12689 }
12690
12691 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64, n_div_4_strided_a) {
12692 TEST_REQUIRES_X86_SSE2;
12693 for (uint32_t n = 8; n <= 12; n += 4) {
12694 for (size_t k = 1; k <= 40; k += 9) {
12695 GemmMicrokernelTester()
12696 .mr(2)
12697 .nr(4)
12698 .kr(2)
12699 .sr(1)
12700 .m(2)
12701 .n(n)
12702 .k(k)
12703 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080012704 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070012705 }
12706 }
12707 }
12708
12709 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64, n_div_4_subtile) {
12710 TEST_REQUIRES_X86_SSE2;
12711 for (uint32_t n = 8; n <= 12; n += 4) {
12712 for (size_t k = 1; k <= 40; k += 9) {
12713 for (uint32_t m = 1; m <= 2; m++) {
12714 GemmMicrokernelTester()
12715 .mr(2)
12716 .nr(4)
12717 .kr(2)
12718 .sr(1)
12719 .m(m)
12720 .n(n)
12721 .k(k)
12722 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012723 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070012724 }
12725 }
12726 }
12727 }
12728
12729 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64, strided_cm_subtile) {
12730 TEST_REQUIRES_X86_SSE2;
12731 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012732 for (uint32_t n = 1; n <= 4; n++) {
12733 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070012734 GemmMicrokernelTester()
12735 .mr(2)
12736 .nr(4)
12737 .kr(2)
12738 .sr(1)
12739 .m(m)
12740 .n(n)
12741 .k(k)
12742 .cm_stride(7)
12743 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012744 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070012745 }
12746 }
12747 }
12748 }
12749
12750 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64, qmin) {
12751 TEST_REQUIRES_X86_SSE2;
12752 GemmMicrokernelTester()
12753 .mr(2)
12754 .nr(4)
12755 .kr(2)
12756 .sr(1)
12757 .m(2)
12758 .n(4)
12759 .k(8)
12760 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012761 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070012762 }
12763
12764 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64, qmax) {
12765 TEST_REQUIRES_X86_SSE2;
12766 GemmMicrokernelTester()
12767 .mr(2)
12768 .nr(4)
12769 .kr(2)
12770 .sr(1)
12771 .m(2)
12772 .n(4)
12773 .k(8)
12774 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012775 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070012776 }
12777
12778 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64, strided_cm) {
12779 TEST_REQUIRES_X86_SSE2;
12780 GemmMicrokernelTester()
12781 .mr(2)
12782 .nr(4)
12783 .kr(2)
12784 .sr(1)
12785 .m(2)
12786 .n(4)
12787 .k(8)
12788 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080012789 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070012790 }
12791#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
12792
12793
12794#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhanc46e6712021-06-01 19:00:16 -070012795 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_eq_8) {
12796 TEST_REQUIRES_X86_SSE2;
12797 GemmMicrokernelTester()
12798 .mr(4)
12799 .nr(4)
12800 .kr(2)
12801 .sr(1)
12802 .m(4)
12803 .n(4)
12804 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080012805 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070012806 }
12807
12808 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64, strided_cn) {
12809 TEST_REQUIRES_X86_SSE2;
12810 GemmMicrokernelTester()
12811 .mr(4)
12812 .nr(4)
12813 .kr(2)
12814 .sr(1)
12815 .m(4)
12816 .n(4)
12817 .k(8)
12818 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080012819 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070012820 }
12821
12822 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_eq_8_strided_a) {
12823 TEST_REQUIRES_X86_SSE2;
12824 GemmMicrokernelTester()
12825 .mr(4)
12826 .nr(4)
12827 .kr(2)
12828 .sr(1)
12829 .m(4)
12830 .n(4)
12831 .k(8)
12832 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080012833 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070012834 }
12835
12836 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_eq_8_subtile) {
12837 TEST_REQUIRES_X86_SSE2;
Zhi An Ng83844ae2022-01-14 09:52:25 -080012838 for (uint32_t n = 1; n <= 4; n++) {
12839 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070012840 GemmMicrokernelTester()
12841 .mr(4)
12842 .nr(4)
12843 .kr(2)
12844 .sr(1)
12845 .m(m)
12846 .n(n)
12847 .k(8)
12848 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012849 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070012850 }
12851 }
12852 }
12853
12854 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_eq_8_subtile_m) {
12855 TEST_REQUIRES_X86_SSE2;
12856 for (uint32_t m = 1; m <= 4; m++) {
12857 GemmMicrokernelTester()
12858 .mr(4)
12859 .nr(4)
12860 .kr(2)
12861 .sr(1)
12862 .m(m)
12863 .n(4)
12864 .k(8)
12865 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012866 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070012867 }
12868 }
12869
12870 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_eq_8_subtile_n) {
12871 TEST_REQUIRES_X86_SSE2;
12872 for (uint32_t n = 1; n <= 4; n++) {
12873 GemmMicrokernelTester()
12874 .mr(4)
12875 .nr(4)
12876 .kr(2)
12877 .sr(1)
12878 .m(4)
12879 .n(n)
12880 .k(8)
12881 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012882 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070012883 }
12884 }
12885
12886 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_lt_8) {
12887 TEST_REQUIRES_X86_SSE2;
12888 for (size_t k = 1; k < 8; k++) {
12889 GemmMicrokernelTester()
12890 .mr(4)
12891 .nr(4)
12892 .kr(2)
12893 .sr(1)
12894 .m(4)
12895 .n(4)
12896 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080012897 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070012898 }
12899 }
12900
12901 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_lt_8_strided_a) {
12902 TEST_REQUIRES_X86_SSE2;
12903 for (size_t k = 1; k < 8; k++) {
12904 GemmMicrokernelTester()
12905 .mr(4)
12906 .nr(4)
12907 .kr(2)
12908 .sr(1)
12909 .m(4)
12910 .n(4)
12911 .k(k)
12912 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080012913 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070012914 }
12915 }
12916
12917 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_lt_8_subtile) {
12918 TEST_REQUIRES_X86_SSE2;
12919 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012920 for (uint32_t n = 1; n <= 4; n++) {
12921 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070012922 GemmMicrokernelTester()
12923 .mr(4)
12924 .nr(4)
12925 .kr(2)
12926 .sr(1)
12927 .m(m)
12928 .n(n)
12929 .k(k)
12930 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012931 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070012932 }
12933 }
12934 }
12935 }
12936
12937 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_gt_8) {
12938 TEST_REQUIRES_X86_SSE2;
12939 for (size_t k = 9; k < 16; k++) {
12940 GemmMicrokernelTester()
12941 .mr(4)
12942 .nr(4)
12943 .kr(2)
12944 .sr(1)
12945 .m(4)
12946 .n(4)
12947 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080012948 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070012949 }
12950 }
12951
12952 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_gt_8_strided_a) {
12953 TEST_REQUIRES_X86_SSE2;
12954 for (size_t k = 9; k < 16; k++) {
12955 GemmMicrokernelTester()
12956 .mr(4)
12957 .nr(4)
12958 .kr(2)
12959 .sr(1)
12960 .m(4)
12961 .n(4)
12962 .k(k)
12963 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080012964 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070012965 }
12966 }
12967
12968 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_gt_8_subtile) {
12969 TEST_REQUIRES_X86_SSE2;
12970 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012971 for (uint32_t n = 1; n <= 4; n++) {
12972 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070012973 GemmMicrokernelTester()
12974 .mr(4)
12975 .nr(4)
12976 .kr(2)
12977 .sr(1)
12978 .m(m)
12979 .n(n)
12980 .k(k)
12981 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012982 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070012983 }
12984 }
12985 }
12986 }
12987
12988 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_div_8) {
12989 TEST_REQUIRES_X86_SSE2;
12990 for (size_t k = 16; k <= 80; k += 8) {
12991 GemmMicrokernelTester()
12992 .mr(4)
12993 .nr(4)
12994 .kr(2)
12995 .sr(1)
12996 .m(4)
12997 .n(4)
12998 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080012999 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070013000 }
13001 }
13002
13003 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_div_8_strided_a) {
13004 TEST_REQUIRES_X86_SSE2;
13005 for (size_t k = 16; k <= 80; k += 8) {
13006 GemmMicrokernelTester()
13007 .mr(4)
13008 .nr(4)
13009 .kr(2)
13010 .sr(1)
13011 .m(4)
13012 .n(4)
13013 .k(k)
13014 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080013015 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070013016 }
13017 }
13018
13019 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_div_8_subtile) {
13020 TEST_REQUIRES_X86_SSE2;
13021 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013022 for (uint32_t n = 1; n <= 4; n++) {
13023 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070013024 GemmMicrokernelTester()
13025 .mr(4)
13026 .nr(4)
13027 .kr(2)
13028 .sr(1)
13029 .m(m)
13030 .n(n)
13031 .k(k)
13032 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013033 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070013034 }
13035 }
13036 }
13037 }
13038
13039 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64, n_gt_4) {
13040 TEST_REQUIRES_X86_SSE2;
13041 for (uint32_t n = 5; n < 8; n++) {
13042 for (size_t k = 1; k <= 40; k += 9) {
13043 GemmMicrokernelTester()
13044 .mr(4)
13045 .nr(4)
13046 .kr(2)
13047 .sr(1)
13048 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013049 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070013050 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080013051 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070013052 }
13053 }
13054 }
13055
13056 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64, n_gt_4_strided_cn) {
13057 TEST_REQUIRES_X86_SSE2;
13058 for (uint32_t n = 5; n < 8; n++) {
13059 for (size_t k = 1; k <= 40; k += 9) {
13060 GemmMicrokernelTester()
13061 .mr(4)
13062 .nr(4)
13063 .kr(2)
13064 .sr(1)
13065 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013066 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070013067 .k(k)
13068 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080013069 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070013070 }
13071 }
13072 }
13073
13074 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64, n_gt_4_strided_a) {
13075 TEST_REQUIRES_X86_SSE2;
13076 for (uint32_t n = 5; n < 8; n++) {
13077 for (size_t k = 1; k <= 40; k += 9) {
13078 GemmMicrokernelTester()
13079 .mr(4)
13080 .nr(4)
13081 .kr(2)
13082 .sr(1)
13083 .m(4)
13084 .n(n)
13085 .k(k)
13086 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080013087 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070013088 }
13089 }
13090 }
13091
13092 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64, n_gt_4_subtile) {
13093 TEST_REQUIRES_X86_SSE2;
13094 for (uint32_t n = 5; n < 8; n++) {
13095 for (size_t k = 1; k <= 40; k += 9) {
13096 for (uint32_t m = 1; m <= 4; m++) {
13097 GemmMicrokernelTester()
13098 .mr(4)
13099 .nr(4)
13100 .kr(2)
13101 .sr(1)
13102 .m(m)
13103 .n(n)
13104 .k(k)
13105 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013106 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070013107 }
13108 }
13109 }
13110 }
13111
13112 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64, n_div_4) {
13113 TEST_REQUIRES_X86_SSE2;
13114 for (uint32_t n = 8; n <= 12; n += 4) {
13115 for (size_t k = 1; k <= 40; k += 9) {
13116 GemmMicrokernelTester()
13117 .mr(4)
13118 .nr(4)
13119 .kr(2)
13120 .sr(1)
13121 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013122 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070013123 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080013124 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070013125 }
13126 }
13127 }
13128
13129 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64, n_div_4_strided_cn) {
13130 TEST_REQUIRES_X86_SSE2;
13131 for (uint32_t n = 8; n <= 12; n += 4) {
13132 for (size_t k = 1; k <= 40; k += 9) {
13133 GemmMicrokernelTester()
13134 .mr(4)
13135 .nr(4)
13136 .kr(2)
13137 .sr(1)
13138 .m(4)
13139 .n(n)
13140 .k(k)
13141 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080013142 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070013143 }
13144 }
13145 }
13146
13147 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64, n_div_4_strided_a) {
13148 TEST_REQUIRES_X86_SSE2;
13149 for (uint32_t n = 8; n <= 12; n += 4) {
13150 for (size_t k = 1; k <= 40; k += 9) {
13151 GemmMicrokernelTester()
13152 .mr(4)
13153 .nr(4)
13154 .kr(2)
13155 .sr(1)
13156 .m(4)
13157 .n(n)
13158 .k(k)
13159 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080013160 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070013161 }
13162 }
13163 }
13164
13165 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64, n_div_4_subtile) {
13166 TEST_REQUIRES_X86_SSE2;
13167 for (uint32_t n = 8; n <= 12; n += 4) {
13168 for (size_t k = 1; k <= 40; k += 9) {
13169 for (uint32_t m = 1; m <= 4; m++) {
13170 GemmMicrokernelTester()
13171 .mr(4)
13172 .nr(4)
13173 .kr(2)
13174 .sr(1)
13175 .m(m)
13176 .n(n)
13177 .k(k)
13178 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013179 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070013180 }
13181 }
13182 }
13183 }
13184
13185 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64, strided_cm_subtile) {
13186 TEST_REQUIRES_X86_SSE2;
13187 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013188 for (uint32_t n = 1; n <= 4; n++) {
13189 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070013190 GemmMicrokernelTester()
13191 .mr(4)
13192 .nr(4)
13193 .kr(2)
13194 .sr(1)
13195 .m(m)
13196 .n(n)
13197 .k(k)
13198 .cm_stride(7)
13199 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013200 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070013201 }
13202 }
13203 }
13204 }
13205
13206 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64, qmin) {
13207 TEST_REQUIRES_X86_SSE2;
13208 GemmMicrokernelTester()
13209 .mr(4)
13210 .nr(4)
13211 .kr(2)
13212 .sr(1)
13213 .m(4)
13214 .n(4)
13215 .k(8)
13216 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013217 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070013218 }
13219
13220 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64, qmax) {
13221 TEST_REQUIRES_X86_SSE2;
13222 GemmMicrokernelTester()
13223 .mr(4)
13224 .nr(4)
13225 .kr(2)
13226 .sr(1)
13227 .m(4)
13228 .n(4)
13229 .k(8)
13230 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013231 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070013232 }
13233
13234 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE2_LD64, strided_cm) {
13235 TEST_REQUIRES_X86_SSE2;
13236 GemmMicrokernelTester()
13237 .mr(4)
13238 .nr(4)
13239 .kr(2)
13240 .sr(1)
13241 .m(4)
13242 .n(4)
13243 .k(8)
13244 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080013245 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070013246 }
13247#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
13248
13249
13250#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhanc46e6712021-06-01 19:00:16 -070013251 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_eq_8) {
13252 TEST_REQUIRES_X86_SSE41;
13253 GemmMicrokernelTester()
13254 .mr(1)
13255 .nr(4)
13256 .kr(2)
13257 .sr(1)
13258 .m(1)
13259 .n(4)
13260 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080013261 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070013262 }
13263
13264 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64, strided_cn) {
13265 TEST_REQUIRES_X86_SSE41;
13266 GemmMicrokernelTester()
13267 .mr(1)
13268 .nr(4)
13269 .kr(2)
13270 .sr(1)
13271 .m(1)
13272 .n(4)
13273 .k(8)
13274 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080013275 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070013276 }
13277
13278 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_eq_8_strided_a) {
13279 TEST_REQUIRES_X86_SSE41;
13280 GemmMicrokernelTester()
13281 .mr(1)
13282 .nr(4)
13283 .kr(2)
13284 .sr(1)
13285 .m(1)
13286 .n(4)
13287 .k(8)
13288 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080013289 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070013290 }
13291
13292 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_eq_8_subtile) {
13293 TEST_REQUIRES_X86_SSE41;
Zhi An Ng83844ae2022-01-14 09:52:25 -080013294 for (uint32_t n = 1; n <= 4; n++) {
13295 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070013296 GemmMicrokernelTester()
13297 .mr(1)
13298 .nr(4)
13299 .kr(2)
13300 .sr(1)
13301 .m(m)
13302 .n(n)
13303 .k(8)
13304 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013305 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070013306 }
13307 }
13308 }
13309
13310 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_eq_8_subtile_m) {
13311 TEST_REQUIRES_X86_SSE41;
13312 for (uint32_t m = 1; m <= 1; m++) {
13313 GemmMicrokernelTester()
13314 .mr(1)
13315 .nr(4)
13316 .kr(2)
13317 .sr(1)
13318 .m(m)
13319 .n(4)
13320 .k(8)
13321 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013322 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070013323 }
13324 }
13325
13326 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_eq_8_subtile_n) {
13327 TEST_REQUIRES_X86_SSE41;
13328 for (uint32_t n = 1; n <= 4; n++) {
13329 GemmMicrokernelTester()
13330 .mr(1)
13331 .nr(4)
13332 .kr(2)
13333 .sr(1)
13334 .m(1)
13335 .n(n)
13336 .k(8)
13337 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013338 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070013339 }
13340 }
13341
13342 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_lt_8) {
13343 TEST_REQUIRES_X86_SSE41;
13344 for (size_t k = 1; k < 8; k++) {
13345 GemmMicrokernelTester()
13346 .mr(1)
13347 .nr(4)
13348 .kr(2)
13349 .sr(1)
13350 .m(1)
13351 .n(4)
13352 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080013353 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070013354 }
13355 }
13356
13357 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_lt_8_strided_a) {
13358 TEST_REQUIRES_X86_SSE41;
13359 for (size_t k = 1; k < 8; k++) {
13360 GemmMicrokernelTester()
13361 .mr(1)
13362 .nr(4)
13363 .kr(2)
13364 .sr(1)
13365 .m(1)
13366 .n(4)
13367 .k(k)
13368 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080013369 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070013370 }
13371 }
13372
13373 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_lt_8_subtile) {
13374 TEST_REQUIRES_X86_SSE41;
13375 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013376 for (uint32_t n = 1; n <= 4; n++) {
13377 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070013378 GemmMicrokernelTester()
13379 .mr(1)
13380 .nr(4)
13381 .kr(2)
13382 .sr(1)
13383 .m(m)
13384 .n(n)
13385 .k(k)
13386 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013387 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070013388 }
13389 }
13390 }
13391 }
13392
13393 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_gt_8) {
13394 TEST_REQUIRES_X86_SSE41;
13395 for (size_t k = 9; k < 16; k++) {
13396 GemmMicrokernelTester()
13397 .mr(1)
13398 .nr(4)
13399 .kr(2)
13400 .sr(1)
13401 .m(1)
13402 .n(4)
13403 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080013404 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070013405 }
13406 }
13407
13408 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_gt_8_strided_a) {
13409 TEST_REQUIRES_X86_SSE41;
13410 for (size_t k = 9; k < 16; k++) {
13411 GemmMicrokernelTester()
13412 .mr(1)
13413 .nr(4)
13414 .kr(2)
13415 .sr(1)
13416 .m(1)
13417 .n(4)
13418 .k(k)
13419 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080013420 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070013421 }
13422 }
13423
13424 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_gt_8_subtile) {
13425 TEST_REQUIRES_X86_SSE41;
13426 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013427 for (uint32_t n = 1; n <= 4; n++) {
13428 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070013429 GemmMicrokernelTester()
13430 .mr(1)
13431 .nr(4)
13432 .kr(2)
13433 .sr(1)
13434 .m(m)
13435 .n(n)
13436 .k(k)
13437 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013438 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070013439 }
13440 }
13441 }
13442 }
13443
13444 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_div_8) {
13445 TEST_REQUIRES_X86_SSE41;
13446 for (size_t k = 16; k <= 80; k += 8) {
13447 GemmMicrokernelTester()
13448 .mr(1)
13449 .nr(4)
13450 .kr(2)
13451 .sr(1)
13452 .m(1)
13453 .n(4)
13454 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080013455 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070013456 }
13457 }
13458
13459 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_div_8_strided_a) {
13460 TEST_REQUIRES_X86_SSE41;
13461 for (size_t k = 16; k <= 80; k += 8) {
13462 GemmMicrokernelTester()
13463 .mr(1)
13464 .nr(4)
13465 .kr(2)
13466 .sr(1)
13467 .m(1)
13468 .n(4)
13469 .k(k)
13470 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080013471 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070013472 }
13473 }
13474
13475 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_div_8_subtile) {
13476 TEST_REQUIRES_X86_SSE41;
13477 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013478 for (uint32_t n = 1; n <= 4; n++) {
13479 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070013480 GemmMicrokernelTester()
13481 .mr(1)
13482 .nr(4)
13483 .kr(2)
13484 .sr(1)
13485 .m(m)
13486 .n(n)
13487 .k(k)
13488 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013489 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070013490 }
13491 }
13492 }
13493 }
13494
13495 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64, n_gt_4) {
13496 TEST_REQUIRES_X86_SSE41;
13497 for (uint32_t n = 5; n < 8; n++) {
13498 for (size_t k = 1; k <= 40; k += 9) {
13499 GemmMicrokernelTester()
13500 .mr(1)
13501 .nr(4)
13502 .kr(2)
13503 .sr(1)
13504 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013505 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070013506 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080013507 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070013508 }
13509 }
13510 }
13511
13512 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64, n_gt_4_strided_cn) {
13513 TEST_REQUIRES_X86_SSE41;
13514 for (uint32_t n = 5; n < 8; n++) {
13515 for (size_t k = 1; k <= 40; k += 9) {
13516 GemmMicrokernelTester()
13517 .mr(1)
13518 .nr(4)
13519 .kr(2)
13520 .sr(1)
13521 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013522 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070013523 .k(k)
13524 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080013525 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070013526 }
13527 }
13528 }
13529
13530 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64, n_gt_4_strided_a) {
13531 TEST_REQUIRES_X86_SSE41;
13532 for (uint32_t n = 5; n < 8; n++) {
13533 for (size_t k = 1; k <= 40; k += 9) {
13534 GemmMicrokernelTester()
13535 .mr(1)
13536 .nr(4)
13537 .kr(2)
13538 .sr(1)
13539 .m(1)
13540 .n(n)
13541 .k(k)
13542 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080013543 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070013544 }
13545 }
13546 }
13547
13548 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64, n_gt_4_subtile) {
13549 TEST_REQUIRES_X86_SSE41;
13550 for (uint32_t n = 5; n < 8; n++) {
13551 for (size_t k = 1; k <= 40; k += 9) {
13552 for (uint32_t m = 1; m <= 1; m++) {
13553 GemmMicrokernelTester()
13554 .mr(1)
13555 .nr(4)
13556 .kr(2)
13557 .sr(1)
13558 .m(m)
13559 .n(n)
13560 .k(k)
13561 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013562 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070013563 }
13564 }
13565 }
13566 }
13567
13568 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64, n_div_4) {
13569 TEST_REQUIRES_X86_SSE41;
13570 for (uint32_t n = 8; n <= 12; n += 4) {
13571 for (size_t k = 1; k <= 40; k += 9) {
13572 GemmMicrokernelTester()
13573 .mr(1)
13574 .nr(4)
13575 .kr(2)
13576 .sr(1)
13577 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013578 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070013579 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080013580 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070013581 }
13582 }
13583 }
13584
13585 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64, n_div_4_strided_cn) {
13586 TEST_REQUIRES_X86_SSE41;
13587 for (uint32_t n = 8; n <= 12; n += 4) {
13588 for (size_t k = 1; k <= 40; k += 9) {
13589 GemmMicrokernelTester()
13590 .mr(1)
13591 .nr(4)
13592 .kr(2)
13593 .sr(1)
13594 .m(1)
13595 .n(n)
13596 .k(k)
13597 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080013598 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070013599 }
13600 }
13601 }
13602
13603 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64, n_div_4_strided_a) {
13604 TEST_REQUIRES_X86_SSE41;
13605 for (uint32_t n = 8; n <= 12; n += 4) {
13606 for (size_t k = 1; k <= 40; k += 9) {
13607 GemmMicrokernelTester()
13608 .mr(1)
13609 .nr(4)
13610 .kr(2)
13611 .sr(1)
13612 .m(1)
13613 .n(n)
13614 .k(k)
13615 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080013616 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070013617 }
13618 }
13619 }
13620
13621 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64, n_div_4_subtile) {
13622 TEST_REQUIRES_X86_SSE41;
13623 for (uint32_t n = 8; n <= 12; n += 4) {
13624 for (size_t k = 1; k <= 40; k += 9) {
13625 for (uint32_t m = 1; m <= 1; m++) {
13626 GemmMicrokernelTester()
13627 .mr(1)
13628 .nr(4)
13629 .kr(2)
13630 .sr(1)
13631 .m(m)
13632 .n(n)
13633 .k(k)
13634 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013635 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070013636 }
13637 }
13638 }
13639 }
13640
13641 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64, strided_cm_subtile) {
13642 TEST_REQUIRES_X86_SSE41;
13643 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013644 for (uint32_t n = 1; n <= 4; n++) {
13645 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070013646 GemmMicrokernelTester()
13647 .mr(1)
13648 .nr(4)
13649 .kr(2)
13650 .sr(1)
13651 .m(m)
13652 .n(n)
13653 .k(k)
13654 .cm_stride(7)
13655 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013656 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070013657 }
13658 }
13659 }
13660 }
13661
13662 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64, qmin) {
13663 TEST_REQUIRES_X86_SSE41;
13664 GemmMicrokernelTester()
13665 .mr(1)
13666 .nr(4)
13667 .kr(2)
13668 .sr(1)
13669 .m(1)
13670 .n(4)
13671 .k(8)
13672 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013673 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070013674 }
13675
13676 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64, qmax) {
13677 TEST_REQUIRES_X86_SSE41;
13678 GemmMicrokernelTester()
13679 .mr(1)
13680 .nr(4)
13681 .kr(2)
13682 .sr(1)
13683 .m(1)
13684 .n(4)
13685 .k(8)
13686 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013687 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070013688 }
13689
13690 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64, strided_cm) {
13691 TEST_REQUIRES_X86_SSE41;
13692 GemmMicrokernelTester()
13693 .mr(1)
13694 .nr(4)
13695 .kr(2)
13696 .sr(1)
13697 .m(1)
13698 .n(4)
13699 .k(8)
13700 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080013701 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070013702 }
13703#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
13704
13705
13706#if XNN_ARCH_X86 || XNN_ARCH_X86_64
13707 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_eq_8) {
13708 TEST_REQUIRES_X86_SSE41;
13709 GemmMicrokernelTester()
13710 .mr(2)
13711 .nr(4)
13712 .kr(2)
13713 .sr(1)
13714 .m(2)
13715 .n(4)
13716 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080013717 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070013718 }
13719
13720 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64, strided_cn) {
13721 TEST_REQUIRES_X86_SSE41;
13722 GemmMicrokernelTester()
13723 .mr(2)
13724 .nr(4)
13725 .kr(2)
13726 .sr(1)
13727 .m(2)
13728 .n(4)
13729 .k(8)
13730 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080013731 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070013732 }
13733
13734 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_eq_8_strided_a) {
13735 TEST_REQUIRES_X86_SSE41;
13736 GemmMicrokernelTester()
13737 .mr(2)
13738 .nr(4)
13739 .kr(2)
13740 .sr(1)
13741 .m(2)
13742 .n(4)
13743 .k(8)
13744 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080013745 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070013746 }
13747
13748 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_eq_8_subtile) {
13749 TEST_REQUIRES_X86_SSE41;
Zhi An Ng83844ae2022-01-14 09:52:25 -080013750 for (uint32_t n = 1; n <= 4; n++) {
13751 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070013752 GemmMicrokernelTester()
13753 .mr(2)
13754 .nr(4)
13755 .kr(2)
13756 .sr(1)
13757 .m(m)
13758 .n(n)
13759 .k(8)
13760 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013761 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070013762 }
13763 }
13764 }
13765
13766 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_eq_8_subtile_m) {
13767 TEST_REQUIRES_X86_SSE41;
13768 for (uint32_t m = 1; m <= 2; m++) {
13769 GemmMicrokernelTester()
13770 .mr(2)
13771 .nr(4)
13772 .kr(2)
13773 .sr(1)
13774 .m(m)
13775 .n(4)
13776 .k(8)
13777 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013778 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070013779 }
13780 }
13781
13782 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_eq_8_subtile_n) {
13783 TEST_REQUIRES_X86_SSE41;
13784 for (uint32_t n = 1; n <= 4; n++) {
13785 GemmMicrokernelTester()
13786 .mr(2)
13787 .nr(4)
13788 .kr(2)
13789 .sr(1)
13790 .m(2)
13791 .n(n)
13792 .k(8)
13793 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013794 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070013795 }
13796 }
13797
13798 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_lt_8) {
13799 TEST_REQUIRES_X86_SSE41;
13800 for (size_t k = 1; k < 8; k++) {
13801 GemmMicrokernelTester()
13802 .mr(2)
13803 .nr(4)
13804 .kr(2)
13805 .sr(1)
13806 .m(2)
13807 .n(4)
13808 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080013809 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070013810 }
13811 }
13812
13813 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_lt_8_strided_a) {
13814 TEST_REQUIRES_X86_SSE41;
13815 for (size_t k = 1; k < 8; k++) {
13816 GemmMicrokernelTester()
13817 .mr(2)
13818 .nr(4)
13819 .kr(2)
13820 .sr(1)
13821 .m(2)
13822 .n(4)
13823 .k(k)
13824 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080013825 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070013826 }
13827 }
13828
13829 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_lt_8_subtile) {
13830 TEST_REQUIRES_X86_SSE41;
13831 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013832 for (uint32_t n = 1; n <= 4; n++) {
13833 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070013834 GemmMicrokernelTester()
13835 .mr(2)
13836 .nr(4)
13837 .kr(2)
13838 .sr(1)
13839 .m(m)
13840 .n(n)
13841 .k(k)
13842 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013843 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070013844 }
13845 }
13846 }
13847 }
13848
13849 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_gt_8) {
13850 TEST_REQUIRES_X86_SSE41;
13851 for (size_t k = 9; k < 16; k++) {
13852 GemmMicrokernelTester()
13853 .mr(2)
13854 .nr(4)
13855 .kr(2)
13856 .sr(1)
13857 .m(2)
13858 .n(4)
13859 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080013860 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070013861 }
13862 }
13863
13864 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_gt_8_strided_a) {
13865 TEST_REQUIRES_X86_SSE41;
13866 for (size_t k = 9; k < 16; k++) {
13867 GemmMicrokernelTester()
13868 .mr(2)
13869 .nr(4)
13870 .kr(2)
13871 .sr(1)
13872 .m(2)
13873 .n(4)
13874 .k(k)
13875 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080013876 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070013877 }
13878 }
13879
13880 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_gt_8_subtile) {
13881 TEST_REQUIRES_X86_SSE41;
13882 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013883 for (uint32_t n = 1; n <= 4; n++) {
13884 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070013885 GemmMicrokernelTester()
13886 .mr(2)
13887 .nr(4)
13888 .kr(2)
13889 .sr(1)
13890 .m(m)
13891 .n(n)
13892 .k(k)
13893 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013894 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070013895 }
13896 }
13897 }
13898 }
13899
13900 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_div_8) {
13901 TEST_REQUIRES_X86_SSE41;
13902 for (size_t k = 16; k <= 80; k += 8) {
13903 GemmMicrokernelTester()
13904 .mr(2)
13905 .nr(4)
13906 .kr(2)
13907 .sr(1)
13908 .m(2)
13909 .n(4)
13910 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080013911 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070013912 }
13913 }
13914
13915 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_div_8_strided_a) {
13916 TEST_REQUIRES_X86_SSE41;
13917 for (size_t k = 16; k <= 80; k += 8) {
13918 GemmMicrokernelTester()
13919 .mr(2)
13920 .nr(4)
13921 .kr(2)
13922 .sr(1)
13923 .m(2)
13924 .n(4)
13925 .k(k)
13926 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080013927 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070013928 }
13929 }
13930
13931 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_div_8_subtile) {
13932 TEST_REQUIRES_X86_SSE41;
13933 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013934 for (uint32_t n = 1; n <= 4; n++) {
13935 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070013936 GemmMicrokernelTester()
13937 .mr(2)
13938 .nr(4)
13939 .kr(2)
13940 .sr(1)
13941 .m(m)
13942 .n(n)
13943 .k(k)
13944 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013945 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070013946 }
13947 }
13948 }
13949 }
13950
13951 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64, n_gt_4) {
13952 TEST_REQUIRES_X86_SSE41;
13953 for (uint32_t n = 5; n < 8; n++) {
13954 for (size_t k = 1; k <= 40; k += 9) {
13955 GemmMicrokernelTester()
13956 .mr(2)
13957 .nr(4)
13958 .kr(2)
13959 .sr(1)
13960 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013961 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070013962 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080013963 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070013964 }
13965 }
13966 }
13967
13968 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64, n_gt_4_strided_cn) {
13969 TEST_REQUIRES_X86_SSE41;
13970 for (uint32_t n = 5; n < 8; n++) {
13971 for (size_t k = 1; k <= 40; k += 9) {
13972 GemmMicrokernelTester()
13973 .mr(2)
13974 .nr(4)
13975 .kr(2)
13976 .sr(1)
13977 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013978 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070013979 .k(k)
13980 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080013981 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070013982 }
13983 }
13984 }
13985
13986 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64, n_gt_4_strided_a) {
13987 TEST_REQUIRES_X86_SSE41;
13988 for (uint32_t n = 5; n < 8; n++) {
13989 for (size_t k = 1; k <= 40; k += 9) {
13990 GemmMicrokernelTester()
13991 .mr(2)
13992 .nr(4)
13993 .kr(2)
13994 .sr(1)
13995 .m(2)
13996 .n(n)
13997 .k(k)
13998 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080013999 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070014000 }
14001 }
14002 }
14003
14004 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64, n_gt_4_subtile) {
14005 TEST_REQUIRES_X86_SSE41;
14006 for (uint32_t n = 5; n < 8; n++) {
14007 for (size_t k = 1; k <= 40; k += 9) {
14008 for (uint32_t m = 1; m <= 2; m++) {
14009 GemmMicrokernelTester()
14010 .mr(2)
14011 .nr(4)
14012 .kr(2)
14013 .sr(1)
14014 .m(m)
14015 .n(n)
14016 .k(k)
14017 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014018 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070014019 }
14020 }
14021 }
14022 }
14023
14024 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64, n_div_4) {
14025 TEST_REQUIRES_X86_SSE41;
14026 for (uint32_t n = 8; n <= 12; n += 4) {
14027 for (size_t k = 1; k <= 40; k += 9) {
14028 GemmMicrokernelTester()
14029 .mr(2)
14030 .nr(4)
14031 .kr(2)
14032 .sr(1)
14033 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014034 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070014035 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080014036 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070014037 }
14038 }
14039 }
14040
14041 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64, n_div_4_strided_cn) {
14042 TEST_REQUIRES_X86_SSE41;
14043 for (uint32_t n = 8; n <= 12; n += 4) {
14044 for (size_t k = 1; k <= 40; k += 9) {
14045 GemmMicrokernelTester()
14046 .mr(2)
14047 .nr(4)
14048 .kr(2)
14049 .sr(1)
14050 .m(2)
14051 .n(n)
14052 .k(k)
14053 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080014054 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070014055 }
14056 }
14057 }
14058
14059 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64, n_div_4_strided_a) {
14060 TEST_REQUIRES_X86_SSE41;
14061 for (uint32_t n = 8; n <= 12; n += 4) {
14062 for (size_t k = 1; k <= 40; k += 9) {
14063 GemmMicrokernelTester()
14064 .mr(2)
14065 .nr(4)
14066 .kr(2)
14067 .sr(1)
14068 .m(2)
14069 .n(n)
14070 .k(k)
14071 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080014072 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070014073 }
14074 }
14075 }
14076
14077 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64, n_div_4_subtile) {
14078 TEST_REQUIRES_X86_SSE41;
14079 for (uint32_t n = 8; n <= 12; n += 4) {
14080 for (size_t k = 1; k <= 40; k += 9) {
14081 for (uint32_t m = 1; m <= 2; m++) {
14082 GemmMicrokernelTester()
14083 .mr(2)
14084 .nr(4)
14085 .kr(2)
14086 .sr(1)
14087 .m(m)
14088 .n(n)
14089 .k(k)
14090 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014091 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070014092 }
14093 }
14094 }
14095 }
14096
14097 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64, strided_cm_subtile) {
14098 TEST_REQUIRES_X86_SSE41;
14099 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014100 for (uint32_t n = 1; n <= 4; n++) {
14101 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070014102 GemmMicrokernelTester()
14103 .mr(2)
14104 .nr(4)
14105 .kr(2)
14106 .sr(1)
14107 .m(m)
14108 .n(n)
14109 .k(k)
14110 .cm_stride(7)
14111 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014112 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070014113 }
14114 }
14115 }
14116 }
14117
14118 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64, qmin) {
14119 TEST_REQUIRES_X86_SSE41;
14120 GemmMicrokernelTester()
14121 .mr(2)
14122 .nr(4)
14123 .kr(2)
14124 .sr(1)
14125 .m(2)
14126 .n(4)
14127 .k(8)
14128 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014129 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070014130 }
14131
14132 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64, qmax) {
14133 TEST_REQUIRES_X86_SSE41;
14134 GemmMicrokernelTester()
14135 .mr(2)
14136 .nr(4)
14137 .kr(2)
14138 .sr(1)
14139 .m(2)
14140 .n(4)
14141 .k(8)
14142 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014143 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070014144 }
14145
14146 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD64, strided_cm) {
14147 TEST_REQUIRES_X86_SSE41;
14148 GemmMicrokernelTester()
14149 .mr(2)
14150 .nr(4)
14151 .kr(2)
14152 .sr(1)
14153 .m(2)
14154 .n(4)
14155 .k(8)
14156 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080014157 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070014158 }
14159#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
14160
14161
14162#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhanc46e6712021-06-01 19:00:16 -070014163 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_eq_8) {
14164 TEST_REQUIRES_X86_SSE41;
14165 GemmMicrokernelTester()
14166 .mr(4)
14167 .nr(4)
14168 .kr(2)
14169 .sr(1)
14170 .m(4)
14171 .n(4)
14172 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080014173 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070014174 }
14175
14176 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64, strided_cn) {
14177 TEST_REQUIRES_X86_SSE41;
14178 GemmMicrokernelTester()
14179 .mr(4)
14180 .nr(4)
14181 .kr(2)
14182 .sr(1)
14183 .m(4)
14184 .n(4)
14185 .k(8)
14186 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080014187 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070014188 }
14189
14190 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_eq_8_strided_a) {
14191 TEST_REQUIRES_X86_SSE41;
14192 GemmMicrokernelTester()
14193 .mr(4)
14194 .nr(4)
14195 .kr(2)
14196 .sr(1)
14197 .m(4)
14198 .n(4)
14199 .k(8)
14200 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080014201 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070014202 }
14203
14204 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_eq_8_subtile) {
14205 TEST_REQUIRES_X86_SSE41;
Zhi An Ng83844ae2022-01-14 09:52:25 -080014206 for (uint32_t n = 1; n <= 4; n++) {
14207 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070014208 GemmMicrokernelTester()
14209 .mr(4)
14210 .nr(4)
14211 .kr(2)
14212 .sr(1)
14213 .m(m)
14214 .n(n)
14215 .k(8)
14216 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014217 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070014218 }
14219 }
14220 }
14221
14222 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_eq_8_subtile_m) {
14223 TEST_REQUIRES_X86_SSE41;
14224 for (uint32_t m = 1; m <= 4; m++) {
14225 GemmMicrokernelTester()
14226 .mr(4)
14227 .nr(4)
14228 .kr(2)
14229 .sr(1)
14230 .m(m)
14231 .n(4)
14232 .k(8)
14233 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014234 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070014235 }
14236 }
14237
14238 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_eq_8_subtile_n) {
14239 TEST_REQUIRES_X86_SSE41;
14240 for (uint32_t n = 1; n <= 4; n++) {
14241 GemmMicrokernelTester()
14242 .mr(4)
14243 .nr(4)
14244 .kr(2)
14245 .sr(1)
14246 .m(4)
14247 .n(n)
14248 .k(8)
14249 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014250 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070014251 }
14252 }
14253
14254 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_lt_8) {
14255 TEST_REQUIRES_X86_SSE41;
14256 for (size_t k = 1; k < 8; k++) {
14257 GemmMicrokernelTester()
14258 .mr(4)
14259 .nr(4)
14260 .kr(2)
14261 .sr(1)
14262 .m(4)
14263 .n(4)
14264 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080014265 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070014266 }
14267 }
14268
14269 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_lt_8_strided_a) {
14270 TEST_REQUIRES_X86_SSE41;
14271 for (size_t k = 1; k < 8; k++) {
14272 GemmMicrokernelTester()
14273 .mr(4)
14274 .nr(4)
14275 .kr(2)
14276 .sr(1)
14277 .m(4)
14278 .n(4)
14279 .k(k)
14280 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080014281 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070014282 }
14283 }
14284
14285 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_lt_8_subtile) {
14286 TEST_REQUIRES_X86_SSE41;
14287 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014288 for (uint32_t n = 1; n <= 4; n++) {
14289 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070014290 GemmMicrokernelTester()
14291 .mr(4)
14292 .nr(4)
14293 .kr(2)
14294 .sr(1)
14295 .m(m)
14296 .n(n)
14297 .k(k)
14298 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014299 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070014300 }
14301 }
14302 }
14303 }
14304
14305 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_gt_8) {
14306 TEST_REQUIRES_X86_SSE41;
14307 for (size_t k = 9; k < 16; k++) {
14308 GemmMicrokernelTester()
14309 .mr(4)
14310 .nr(4)
14311 .kr(2)
14312 .sr(1)
14313 .m(4)
14314 .n(4)
14315 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080014316 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070014317 }
14318 }
14319
14320 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_gt_8_strided_a) {
14321 TEST_REQUIRES_X86_SSE41;
14322 for (size_t k = 9; k < 16; k++) {
14323 GemmMicrokernelTester()
14324 .mr(4)
14325 .nr(4)
14326 .kr(2)
14327 .sr(1)
14328 .m(4)
14329 .n(4)
14330 .k(k)
14331 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080014332 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070014333 }
14334 }
14335
14336 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_gt_8_subtile) {
14337 TEST_REQUIRES_X86_SSE41;
14338 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014339 for (uint32_t n = 1; n <= 4; n++) {
14340 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070014341 GemmMicrokernelTester()
14342 .mr(4)
14343 .nr(4)
14344 .kr(2)
14345 .sr(1)
14346 .m(m)
14347 .n(n)
14348 .k(k)
14349 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014350 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070014351 }
14352 }
14353 }
14354 }
14355
14356 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_div_8) {
14357 TEST_REQUIRES_X86_SSE41;
14358 for (size_t k = 16; k <= 80; k += 8) {
14359 GemmMicrokernelTester()
14360 .mr(4)
14361 .nr(4)
14362 .kr(2)
14363 .sr(1)
14364 .m(4)
14365 .n(4)
14366 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080014367 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070014368 }
14369 }
14370
14371 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_div_8_strided_a) {
14372 TEST_REQUIRES_X86_SSE41;
14373 for (size_t k = 16; k <= 80; k += 8) {
14374 GemmMicrokernelTester()
14375 .mr(4)
14376 .nr(4)
14377 .kr(2)
14378 .sr(1)
14379 .m(4)
14380 .n(4)
14381 .k(k)
14382 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080014383 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070014384 }
14385 }
14386
14387 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_div_8_subtile) {
14388 TEST_REQUIRES_X86_SSE41;
14389 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014390 for (uint32_t n = 1; n <= 4; n++) {
14391 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070014392 GemmMicrokernelTester()
14393 .mr(4)
14394 .nr(4)
14395 .kr(2)
14396 .sr(1)
14397 .m(m)
14398 .n(n)
14399 .k(k)
14400 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014401 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070014402 }
14403 }
14404 }
14405 }
14406
14407 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64, n_gt_4) {
14408 TEST_REQUIRES_X86_SSE41;
14409 for (uint32_t n = 5; n < 8; n++) {
14410 for (size_t k = 1; k <= 40; k += 9) {
14411 GemmMicrokernelTester()
14412 .mr(4)
14413 .nr(4)
14414 .kr(2)
14415 .sr(1)
14416 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014417 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070014418 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080014419 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070014420 }
14421 }
14422 }
14423
14424 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64, n_gt_4_strided_cn) {
14425 TEST_REQUIRES_X86_SSE41;
14426 for (uint32_t n = 5; n < 8; n++) {
14427 for (size_t k = 1; k <= 40; k += 9) {
14428 GemmMicrokernelTester()
14429 .mr(4)
14430 .nr(4)
14431 .kr(2)
14432 .sr(1)
14433 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014434 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070014435 .k(k)
14436 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080014437 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070014438 }
14439 }
14440 }
14441
14442 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64, n_gt_4_strided_a) {
14443 TEST_REQUIRES_X86_SSE41;
14444 for (uint32_t n = 5; n < 8; n++) {
14445 for (size_t k = 1; k <= 40; k += 9) {
14446 GemmMicrokernelTester()
14447 .mr(4)
14448 .nr(4)
14449 .kr(2)
14450 .sr(1)
14451 .m(4)
14452 .n(n)
14453 .k(k)
14454 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080014455 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070014456 }
14457 }
14458 }
14459
14460 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64, n_gt_4_subtile) {
14461 TEST_REQUIRES_X86_SSE41;
14462 for (uint32_t n = 5; n < 8; n++) {
14463 for (size_t k = 1; k <= 40; k += 9) {
14464 for (uint32_t m = 1; m <= 4; m++) {
14465 GemmMicrokernelTester()
14466 .mr(4)
14467 .nr(4)
14468 .kr(2)
14469 .sr(1)
14470 .m(m)
14471 .n(n)
14472 .k(k)
14473 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014474 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070014475 }
14476 }
14477 }
14478 }
14479
14480 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64, n_div_4) {
14481 TEST_REQUIRES_X86_SSE41;
14482 for (uint32_t n = 8; n <= 12; n += 4) {
14483 for (size_t k = 1; k <= 40; k += 9) {
14484 GemmMicrokernelTester()
14485 .mr(4)
14486 .nr(4)
14487 .kr(2)
14488 .sr(1)
14489 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014490 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070014491 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080014492 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070014493 }
14494 }
14495 }
14496
14497 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64, n_div_4_strided_cn) {
14498 TEST_REQUIRES_X86_SSE41;
14499 for (uint32_t n = 8; n <= 12; n += 4) {
14500 for (size_t k = 1; k <= 40; k += 9) {
14501 GemmMicrokernelTester()
14502 .mr(4)
14503 .nr(4)
14504 .kr(2)
14505 .sr(1)
14506 .m(4)
14507 .n(n)
14508 .k(k)
14509 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080014510 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070014511 }
14512 }
14513 }
14514
14515 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64, n_div_4_strided_a) {
14516 TEST_REQUIRES_X86_SSE41;
14517 for (uint32_t n = 8; n <= 12; n += 4) {
14518 for (size_t k = 1; k <= 40; k += 9) {
14519 GemmMicrokernelTester()
14520 .mr(4)
14521 .nr(4)
14522 .kr(2)
14523 .sr(1)
14524 .m(4)
14525 .n(n)
14526 .k(k)
14527 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080014528 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070014529 }
14530 }
14531 }
14532
14533 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64, n_div_4_subtile) {
14534 TEST_REQUIRES_X86_SSE41;
14535 for (uint32_t n = 8; n <= 12; n += 4) {
14536 for (size_t k = 1; k <= 40; k += 9) {
14537 for (uint32_t m = 1; m <= 4; m++) {
14538 GemmMicrokernelTester()
14539 .mr(4)
14540 .nr(4)
14541 .kr(2)
14542 .sr(1)
14543 .m(m)
14544 .n(n)
14545 .k(k)
14546 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014547 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070014548 }
14549 }
14550 }
14551 }
14552
14553 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64, strided_cm_subtile) {
14554 TEST_REQUIRES_X86_SSE41;
14555 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014556 for (uint32_t n = 1; n <= 4; n++) {
14557 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070014558 GemmMicrokernelTester()
14559 .mr(4)
14560 .nr(4)
14561 .kr(2)
14562 .sr(1)
14563 .m(m)
14564 .n(n)
14565 .k(k)
14566 .cm_stride(7)
14567 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014568 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070014569 }
14570 }
14571 }
14572 }
14573
14574 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64, qmin) {
14575 TEST_REQUIRES_X86_SSE41;
14576 GemmMicrokernelTester()
14577 .mr(4)
14578 .nr(4)
14579 .kr(2)
14580 .sr(1)
14581 .m(4)
14582 .n(4)
14583 .k(8)
14584 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014585 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070014586 }
14587
14588 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64, qmax) {
14589 TEST_REQUIRES_X86_SSE41;
14590 GemmMicrokernelTester()
14591 .mr(4)
14592 .nr(4)
14593 .kr(2)
14594 .sr(1)
14595 .m(4)
14596 .n(4)
14597 .k(8)
14598 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014599 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070014600 }
14601
14602 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD64, strided_cm) {
14603 TEST_REQUIRES_X86_SSE41;
14604 GemmMicrokernelTester()
14605 .mr(4)
14606 .nr(4)
14607 .kr(2)
14608 .sr(1)
14609 .m(4)
14610 .n(4)
14611 .k(8)
14612 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080014613 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070014614 }
14615#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
14616
14617
14618#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhanc46e6712021-06-01 19:00:16 -070014619 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_eq_8) {
14620 TEST_REQUIRES_X86_AVX;
14621 GemmMicrokernelTester()
14622 .mr(2)
14623 .nr(4)
14624 .kr(2)
14625 .sr(1)
14626 .m(2)
14627 .n(4)
14628 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080014629 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070014630 }
14631
14632 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, strided_cn) {
14633 TEST_REQUIRES_X86_AVX;
14634 GemmMicrokernelTester()
14635 .mr(2)
14636 .nr(4)
14637 .kr(2)
14638 .sr(1)
14639 .m(2)
14640 .n(4)
14641 .k(8)
14642 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080014643 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070014644 }
14645
14646 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_eq_8_strided_a) {
14647 TEST_REQUIRES_X86_AVX;
14648 GemmMicrokernelTester()
14649 .mr(2)
14650 .nr(4)
14651 .kr(2)
14652 .sr(1)
14653 .m(2)
14654 .n(4)
14655 .k(8)
14656 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080014657 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070014658 }
14659
14660 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_eq_8_subtile) {
14661 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080014662 for (uint32_t n = 1; n <= 4; n++) {
14663 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070014664 GemmMicrokernelTester()
14665 .mr(2)
14666 .nr(4)
14667 .kr(2)
14668 .sr(1)
14669 .m(m)
14670 .n(n)
14671 .k(8)
14672 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014673 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070014674 }
14675 }
14676 }
14677
14678 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_eq_8_subtile_m) {
14679 TEST_REQUIRES_X86_AVX;
14680 for (uint32_t m = 1; m <= 2; m++) {
14681 GemmMicrokernelTester()
14682 .mr(2)
14683 .nr(4)
14684 .kr(2)
14685 .sr(1)
14686 .m(m)
14687 .n(4)
14688 .k(8)
14689 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014690 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070014691 }
14692 }
14693
14694 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_eq_8_subtile_n) {
14695 TEST_REQUIRES_X86_AVX;
14696 for (uint32_t n = 1; n <= 4; n++) {
14697 GemmMicrokernelTester()
14698 .mr(2)
14699 .nr(4)
14700 .kr(2)
14701 .sr(1)
14702 .m(2)
14703 .n(n)
14704 .k(8)
14705 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014706 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070014707 }
14708 }
14709
14710 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_lt_8) {
14711 TEST_REQUIRES_X86_AVX;
14712 for (size_t k = 1; k < 8; k++) {
14713 GemmMicrokernelTester()
14714 .mr(2)
14715 .nr(4)
14716 .kr(2)
14717 .sr(1)
14718 .m(2)
14719 .n(4)
14720 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080014721 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070014722 }
14723 }
14724
14725 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_lt_8_strided_a) {
14726 TEST_REQUIRES_X86_AVX;
14727 for (size_t k = 1; k < 8; k++) {
14728 GemmMicrokernelTester()
14729 .mr(2)
14730 .nr(4)
14731 .kr(2)
14732 .sr(1)
14733 .m(2)
14734 .n(4)
14735 .k(k)
14736 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080014737 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070014738 }
14739 }
14740
14741 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_lt_8_subtile) {
14742 TEST_REQUIRES_X86_AVX;
14743 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014744 for (uint32_t n = 1; n <= 4; n++) {
14745 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070014746 GemmMicrokernelTester()
14747 .mr(2)
14748 .nr(4)
14749 .kr(2)
14750 .sr(1)
14751 .m(m)
14752 .n(n)
14753 .k(k)
14754 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014755 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070014756 }
14757 }
14758 }
14759 }
14760
14761 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_gt_8) {
14762 TEST_REQUIRES_X86_AVX;
14763 for (size_t k = 9; k < 16; k++) {
14764 GemmMicrokernelTester()
14765 .mr(2)
14766 .nr(4)
14767 .kr(2)
14768 .sr(1)
14769 .m(2)
14770 .n(4)
14771 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080014772 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070014773 }
14774 }
14775
14776 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_gt_8_strided_a) {
14777 TEST_REQUIRES_X86_AVX;
14778 for (size_t k = 9; k < 16; k++) {
14779 GemmMicrokernelTester()
14780 .mr(2)
14781 .nr(4)
14782 .kr(2)
14783 .sr(1)
14784 .m(2)
14785 .n(4)
14786 .k(k)
14787 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080014788 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070014789 }
14790 }
14791
14792 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_gt_8_subtile) {
14793 TEST_REQUIRES_X86_AVX;
14794 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014795 for (uint32_t n = 1; n <= 4; n++) {
14796 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070014797 GemmMicrokernelTester()
14798 .mr(2)
14799 .nr(4)
14800 .kr(2)
14801 .sr(1)
14802 .m(m)
14803 .n(n)
14804 .k(k)
14805 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014806 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070014807 }
14808 }
14809 }
14810 }
14811
14812 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_div_8) {
14813 TEST_REQUIRES_X86_AVX;
14814 for (size_t k = 16; k <= 80; k += 8) {
14815 GemmMicrokernelTester()
14816 .mr(2)
14817 .nr(4)
14818 .kr(2)
14819 .sr(1)
14820 .m(2)
14821 .n(4)
14822 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080014823 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070014824 }
14825 }
14826
14827 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_div_8_strided_a) {
14828 TEST_REQUIRES_X86_AVX;
14829 for (size_t k = 16; k <= 80; k += 8) {
14830 GemmMicrokernelTester()
14831 .mr(2)
14832 .nr(4)
14833 .kr(2)
14834 .sr(1)
14835 .m(2)
14836 .n(4)
14837 .k(k)
14838 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080014839 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070014840 }
14841 }
14842
14843 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_div_8_subtile) {
14844 TEST_REQUIRES_X86_AVX;
14845 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014846 for (uint32_t n = 1; n <= 4; n++) {
14847 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070014848 GemmMicrokernelTester()
14849 .mr(2)
14850 .nr(4)
14851 .kr(2)
14852 .sr(1)
14853 .m(m)
14854 .n(n)
14855 .k(k)
14856 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014857 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070014858 }
14859 }
14860 }
14861 }
14862
14863 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_gt_4) {
14864 TEST_REQUIRES_X86_AVX;
14865 for (uint32_t n = 5; n < 8; n++) {
14866 for (size_t k = 1; k <= 40; k += 9) {
14867 GemmMicrokernelTester()
14868 .mr(2)
14869 .nr(4)
14870 .kr(2)
14871 .sr(1)
14872 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014873 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070014874 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080014875 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070014876 }
14877 }
14878 }
14879
14880 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_gt_4_strided_cn) {
14881 TEST_REQUIRES_X86_AVX;
14882 for (uint32_t n = 5; n < 8; n++) {
14883 for (size_t k = 1; k <= 40; k += 9) {
14884 GemmMicrokernelTester()
14885 .mr(2)
14886 .nr(4)
14887 .kr(2)
14888 .sr(1)
14889 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014890 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070014891 .k(k)
14892 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080014893 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070014894 }
14895 }
14896 }
14897
14898 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_gt_4_strided_a) {
14899 TEST_REQUIRES_X86_AVX;
14900 for (uint32_t n = 5; n < 8; n++) {
14901 for (size_t k = 1; k <= 40; k += 9) {
14902 GemmMicrokernelTester()
14903 .mr(2)
14904 .nr(4)
14905 .kr(2)
14906 .sr(1)
14907 .m(2)
14908 .n(n)
14909 .k(k)
14910 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080014911 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070014912 }
14913 }
14914 }
14915
14916 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_gt_4_subtile) {
14917 TEST_REQUIRES_X86_AVX;
14918 for (uint32_t n = 5; n < 8; n++) {
14919 for (size_t k = 1; k <= 40; k += 9) {
14920 for (uint32_t m = 1; m <= 2; m++) {
14921 GemmMicrokernelTester()
14922 .mr(2)
14923 .nr(4)
14924 .kr(2)
14925 .sr(1)
14926 .m(m)
14927 .n(n)
14928 .k(k)
14929 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014930 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070014931 }
14932 }
14933 }
14934 }
14935
14936 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_div_4) {
14937 TEST_REQUIRES_X86_AVX;
14938 for (uint32_t n = 8; n <= 12; n += 4) {
14939 for (size_t k = 1; k <= 40; k += 9) {
14940 GemmMicrokernelTester()
14941 .mr(2)
14942 .nr(4)
14943 .kr(2)
14944 .sr(1)
14945 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014946 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070014947 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080014948 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070014949 }
14950 }
14951 }
14952
14953 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_div_4_strided_cn) {
14954 TEST_REQUIRES_X86_AVX;
14955 for (uint32_t n = 8; n <= 12; n += 4) {
14956 for (size_t k = 1; k <= 40; k += 9) {
14957 GemmMicrokernelTester()
14958 .mr(2)
14959 .nr(4)
14960 .kr(2)
14961 .sr(1)
14962 .m(2)
14963 .n(n)
14964 .k(k)
14965 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080014966 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070014967 }
14968 }
14969 }
14970
14971 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_div_4_strided_a) {
14972 TEST_REQUIRES_X86_AVX;
14973 for (uint32_t n = 8; n <= 12; n += 4) {
14974 for (size_t k = 1; k <= 40; k += 9) {
14975 GemmMicrokernelTester()
14976 .mr(2)
14977 .nr(4)
14978 .kr(2)
14979 .sr(1)
14980 .m(2)
14981 .n(n)
14982 .k(k)
14983 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080014984 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070014985 }
14986 }
14987 }
14988
14989 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_div_4_subtile) {
14990 TEST_REQUIRES_X86_AVX;
14991 for (uint32_t n = 8; n <= 12; n += 4) {
14992 for (size_t k = 1; k <= 40; k += 9) {
14993 for (uint32_t m = 1; m <= 2; m++) {
14994 GemmMicrokernelTester()
14995 .mr(2)
14996 .nr(4)
14997 .kr(2)
14998 .sr(1)
14999 .m(m)
15000 .n(n)
15001 .k(k)
15002 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015003 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015004 }
15005 }
15006 }
15007 }
15008
15009 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, strided_cm_subtile) {
15010 TEST_REQUIRES_X86_AVX;
15011 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015012 for (uint32_t n = 1; n <= 4; n++) {
15013 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070015014 GemmMicrokernelTester()
15015 .mr(2)
15016 .nr(4)
15017 .kr(2)
15018 .sr(1)
15019 .m(m)
15020 .n(n)
15021 .k(k)
15022 .cm_stride(7)
15023 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015024 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015025 }
15026 }
15027 }
15028 }
15029
15030 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, qmin) {
15031 TEST_REQUIRES_X86_AVX;
15032 GemmMicrokernelTester()
15033 .mr(2)
15034 .nr(4)
15035 .kr(2)
15036 .sr(1)
15037 .m(2)
15038 .n(4)
15039 .k(8)
15040 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015041 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015042 }
15043
15044 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, qmax) {
15045 TEST_REQUIRES_X86_AVX;
15046 GemmMicrokernelTester()
15047 .mr(2)
15048 .nr(4)
15049 .kr(2)
15050 .sr(1)
15051 .m(2)
15052 .n(4)
15053 .k(8)
15054 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015055 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015056 }
15057
15058 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, strided_cm) {
15059 TEST_REQUIRES_X86_AVX;
15060 GemmMicrokernelTester()
15061 .mr(2)
15062 .nr(4)
15063 .kr(2)
15064 .sr(1)
15065 .m(2)
15066 .n(4)
15067 .k(8)
15068 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080015069 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015070 }
15071#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
15072
15073
15074#if XNN_ARCH_X86 || XNN_ARCH_X86_64
15075 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_eq_8) {
15076 TEST_REQUIRES_X86_AVX;
15077 GemmMicrokernelTester()
15078 .mr(3)
15079 .nr(4)
15080 .kr(2)
15081 .sr(1)
15082 .m(3)
15083 .n(4)
15084 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080015085 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015086 }
15087
15088 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, strided_cn) {
15089 TEST_REQUIRES_X86_AVX;
15090 GemmMicrokernelTester()
15091 .mr(3)
15092 .nr(4)
15093 .kr(2)
15094 .sr(1)
15095 .m(3)
15096 .n(4)
15097 .k(8)
15098 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080015099 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015100 }
15101
15102 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_eq_8_strided_a) {
15103 TEST_REQUIRES_X86_AVX;
15104 GemmMicrokernelTester()
15105 .mr(3)
15106 .nr(4)
15107 .kr(2)
15108 .sr(1)
15109 .m(3)
15110 .n(4)
15111 .k(8)
15112 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080015113 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015114 }
15115
15116 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_eq_8_subtile) {
15117 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080015118 for (uint32_t n = 1; n <= 4; n++) {
15119 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070015120 GemmMicrokernelTester()
15121 .mr(3)
15122 .nr(4)
15123 .kr(2)
15124 .sr(1)
15125 .m(m)
15126 .n(n)
15127 .k(8)
15128 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015129 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015130 }
15131 }
15132 }
15133
15134 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_eq_8_subtile_m) {
15135 TEST_REQUIRES_X86_AVX;
15136 for (uint32_t m = 1; m <= 3; m++) {
15137 GemmMicrokernelTester()
15138 .mr(3)
15139 .nr(4)
15140 .kr(2)
15141 .sr(1)
15142 .m(m)
15143 .n(4)
15144 .k(8)
15145 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015146 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015147 }
15148 }
15149
15150 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_eq_8_subtile_n) {
15151 TEST_REQUIRES_X86_AVX;
15152 for (uint32_t n = 1; n <= 4; n++) {
15153 GemmMicrokernelTester()
15154 .mr(3)
15155 .nr(4)
15156 .kr(2)
15157 .sr(1)
15158 .m(3)
15159 .n(n)
15160 .k(8)
15161 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015162 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015163 }
15164 }
15165
15166 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_lt_8) {
15167 TEST_REQUIRES_X86_AVX;
15168 for (size_t k = 1; k < 8; k++) {
15169 GemmMicrokernelTester()
15170 .mr(3)
15171 .nr(4)
15172 .kr(2)
15173 .sr(1)
15174 .m(3)
15175 .n(4)
15176 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080015177 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015178 }
15179 }
15180
15181 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_lt_8_strided_a) {
15182 TEST_REQUIRES_X86_AVX;
15183 for (size_t k = 1; k < 8; k++) {
15184 GemmMicrokernelTester()
15185 .mr(3)
15186 .nr(4)
15187 .kr(2)
15188 .sr(1)
15189 .m(3)
15190 .n(4)
15191 .k(k)
15192 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080015193 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015194 }
15195 }
15196
15197 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_lt_8_subtile) {
15198 TEST_REQUIRES_X86_AVX;
15199 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015200 for (uint32_t n = 1; n <= 4; n++) {
15201 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070015202 GemmMicrokernelTester()
15203 .mr(3)
15204 .nr(4)
15205 .kr(2)
15206 .sr(1)
15207 .m(m)
15208 .n(n)
15209 .k(k)
15210 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015211 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015212 }
15213 }
15214 }
15215 }
15216
15217 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_gt_8) {
15218 TEST_REQUIRES_X86_AVX;
15219 for (size_t k = 9; k < 16; k++) {
15220 GemmMicrokernelTester()
15221 .mr(3)
15222 .nr(4)
15223 .kr(2)
15224 .sr(1)
15225 .m(3)
15226 .n(4)
15227 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080015228 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015229 }
15230 }
15231
15232 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_gt_8_strided_a) {
15233 TEST_REQUIRES_X86_AVX;
15234 for (size_t k = 9; k < 16; k++) {
15235 GemmMicrokernelTester()
15236 .mr(3)
15237 .nr(4)
15238 .kr(2)
15239 .sr(1)
15240 .m(3)
15241 .n(4)
15242 .k(k)
15243 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080015244 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015245 }
15246 }
15247
15248 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_gt_8_subtile) {
15249 TEST_REQUIRES_X86_AVX;
15250 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015251 for (uint32_t n = 1; n <= 4; n++) {
15252 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070015253 GemmMicrokernelTester()
15254 .mr(3)
15255 .nr(4)
15256 .kr(2)
15257 .sr(1)
15258 .m(m)
15259 .n(n)
15260 .k(k)
15261 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015262 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015263 }
15264 }
15265 }
15266 }
15267
15268 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_div_8) {
15269 TEST_REQUIRES_X86_AVX;
15270 for (size_t k = 16; k <= 80; k += 8) {
15271 GemmMicrokernelTester()
15272 .mr(3)
15273 .nr(4)
15274 .kr(2)
15275 .sr(1)
15276 .m(3)
15277 .n(4)
15278 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080015279 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015280 }
15281 }
15282
15283 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_div_8_strided_a) {
15284 TEST_REQUIRES_X86_AVX;
15285 for (size_t k = 16; k <= 80; k += 8) {
15286 GemmMicrokernelTester()
15287 .mr(3)
15288 .nr(4)
15289 .kr(2)
15290 .sr(1)
15291 .m(3)
15292 .n(4)
15293 .k(k)
15294 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080015295 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015296 }
15297 }
15298
15299 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_div_8_subtile) {
15300 TEST_REQUIRES_X86_AVX;
15301 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015302 for (uint32_t n = 1; n <= 4; n++) {
15303 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070015304 GemmMicrokernelTester()
15305 .mr(3)
15306 .nr(4)
15307 .kr(2)
15308 .sr(1)
15309 .m(m)
15310 .n(n)
15311 .k(k)
15312 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015313 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015314 }
15315 }
15316 }
15317 }
15318
15319 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_gt_4) {
15320 TEST_REQUIRES_X86_AVX;
15321 for (uint32_t n = 5; n < 8; n++) {
15322 for (size_t k = 1; k <= 40; k += 9) {
15323 GemmMicrokernelTester()
15324 .mr(3)
15325 .nr(4)
15326 .kr(2)
15327 .sr(1)
15328 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015329 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070015330 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080015331 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015332 }
15333 }
15334 }
15335
15336 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_gt_4_strided_cn) {
15337 TEST_REQUIRES_X86_AVX;
15338 for (uint32_t n = 5; n < 8; n++) {
15339 for (size_t k = 1; k <= 40; k += 9) {
15340 GemmMicrokernelTester()
15341 .mr(3)
15342 .nr(4)
15343 .kr(2)
15344 .sr(1)
15345 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015346 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070015347 .k(k)
15348 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080015349 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015350 }
15351 }
15352 }
15353
15354 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_gt_4_strided_a) {
15355 TEST_REQUIRES_X86_AVX;
15356 for (uint32_t n = 5; n < 8; n++) {
15357 for (size_t k = 1; k <= 40; k += 9) {
15358 GemmMicrokernelTester()
15359 .mr(3)
15360 .nr(4)
15361 .kr(2)
15362 .sr(1)
15363 .m(3)
15364 .n(n)
15365 .k(k)
15366 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080015367 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015368 }
15369 }
15370 }
15371
15372 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_gt_4_subtile) {
15373 TEST_REQUIRES_X86_AVX;
15374 for (uint32_t n = 5; n < 8; n++) {
15375 for (size_t k = 1; k <= 40; k += 9) {
15376 for (uint32_t m = 1; m <= 3; m++) {
15377 GemmMicrokernelTester()
15378 .mr(3)
15379 .nr(4)
15380 .kr(2)
15381 .sr(1)
15382 .m(m)
15383 .n(n)
15384 .k(k)
15385 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015386 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015387 }
15388 }
15389 }
15390 }
15391
15392 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_div_4) {
15393 TEST_REQUIRES_X86_AVX;
15394 for (uint32_t n = 8; n <= 12; n += 4) {
15395 for (size_t k = 1; k <= 40; k += 9) {
15396 GemmMicrokernelTester()
15397 .mr(3)
15398 .nr(4)
15399 .kr(2)
15400 .sr(1)
15401 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015402 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070015403 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080015404 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015405 }
15406 }
15407 }
15408
15409 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_div_4_strided_cn) {
15410 TEST_REQUIRES_X86_AVX;
15411 for (uint32_t n = 8; n <= 12; n += 4) {
15412 for (size_t k = 1; k <= 40; k += 9) {
15413 GemmMicrokernelTester()
15414 .mr(3)
15415 .nr(4)
15416 .kr(2)
15417 .sr(1)
15418 .m(3)
15419 .n(n)
15420 .k(k)
15421 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080015422 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015423 }
15424 }
15425 }
15426
15427 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_div_4_strided_a) {
15428 TEST_REQUIRES_X86_AVX;
15429 for (uint32_t n = 8; n <= 12; n += 4) {
15430 for (size_t k = 1; k <= 40; k += 9) {
15431 GemmMicrokernelTester()
15432 .mr(3)
15433 .nr(4)
15434 .kr(2)
15435 .sr(1)
15436 .m(3)
15437 .n(n)
15438 .k(k)
15439 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080015440 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015441 }
15442 }
15443 }
15444
15445 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_div_4_subtile) {
15446 TEST_REQUIRES_X86_AVX;
15447 for (uint32_t n = 8; n <= 12; n += 4) {
15448 for (size_t k = 1; k <= 40; k += 9) {
15449 for (uint32_t m = 1; m <= 3; m++) {
15450 GemmMicrokernelTester()
15451 .mr(3)
15452 .nr(4)
15453 .kr(2)
15454 .sr(1)
15455 .m(m)
15456 .n(n)
15457 .k(k)
15458 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015459 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015460 }
15461 }
15462 }
15463 }
15464
15465 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, strided_cm_subtile) {
15466 TEST_REQUIRES_X86_AVX;
15467 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015468 for (uint32_t n = 1; n <= 4; n++) {
15469 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070015470 GemmMicrokernelTester()
15471 .mr(3)
15472 .nr(4)
15473 .kr(2)
15474 .sr(1)
15475 .m(m)
15476 .n(n)
15477 .k(k)
15478 .cm_stride(7)
15479 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015480 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015481 }
15482 }
15483 }
15484 }
15485
15486 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, qmin) {
15487 TEST_REQUIRES_X86_AVX;
15488 GemmMicrokernelTester()
15489 .mr(3)
15490 .nr(4)
15491 .kr(2)
15492 .sr(1)
15493 .m(3)
15494 .n(4)
15495 .k(8)
15496 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015497 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015498 }
15499
15500 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, qmax) {
15501 TEST_REQUIRES_X86_AVX;
15502 GemmMicrokernelTester()
15503 .mr(3)
15504 .nr(4)
15505 .kr(2)
15506 .sr(1)
15507 .m(3)
15508 .n(4)
15509 .k(8)
15510 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015511 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015512 }
15513
15514 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__AVX_LD64, strided_cm) {
15515 TEST_REQUIRES_X86_AVX;
15516 GemmMicrokernelTester()
15517 .mr(3)
15518 .nr(4)
15519 .kr(2)
15520 .sr(1)
15521 .m(3)
15522 .n(4)
15523 .k(8)
15524 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080015525 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015526 }
15527#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
15528
15529
15530#if XNN_ARCH_X86 || XNN_ARCH_X86_64
15531 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_eq_8) {
15532 TEST_REQUIRES_X86_AVX;
15533 GemmMicrokernelTester()
15534 .mr(4)
15535 .nr(4)
15536 .kr(2)
15537 .sr(1)
15538 .m(4)
15539 .n(4)
15540 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080015541 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015542 }
15543
15544 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, strided_cn) {
15545 TEST_REQUIRES_X86_AVX;
15546 GemmMicrokernelTester()
15547 .mr(4)
15548 .nr(4)
15549 .kr(2)
15550 .sr(1)
15551 .m(4)
15552 .n(4)
15553 .k(8)
15554 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080015555 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015556 }
15557
15558 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_eq_8_strided_a) {
15559 TEST_REQUIRES_X86_AVX;
15560 GemmMicrokernelTester()
15561 .mr(4)
15562 .nr(4)
15563 .kr(2)
15564 .sr(1)
15565 .m(4)
15566 .n(4)
15567 .k(8)
15568 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080015569 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015570 }
15571
15572 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_eq_8_subtile) {
15573 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080015574 for (uint32_t n = 1; n <= 4; n++) {
15575 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070015576 GemmMicrokernelTester()
15577 .mr(4)
15578 .nr(4)
15579 .kr(2)
15580 .sr(1)
15581 .m(m)
15582 .n(n)
15583 .k(8)
15584 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015585 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015586 }
15587 }
15588 }
15589
15590 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_eq_8_subtile_m) {
15591 TEST_REQUIRES_X86_AVX;
15592 for (uint32_t m = 1; m <= 4; m++) {
15593 GemmMicrokernelTester()
15594 .mr(4)
15595 .nr(4)
15596 .kr(2)
15597 .sr(1)
15598 .m(m)
15599 .n(4)
15600 .k(8)
15601 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015602 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015603 }
15604 }
15605
15606 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_eq_8_subtile_n) {
15607 TEST_REQUIRES_X86_AVX;
15608 for (uint32_t n = 1; n <= 4; n++) {
15609 GemmMicrokernelTester()
15610 .mr(4)
15611 .nr(4)
15612 .kr(2)
15613 .sr(1)
15614 .m(4)
15615 .n(n)
15616 .k(8)
15617 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015618 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015619 }
15620 }
15621
15622 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_lt_8) {
15623 TEST_REQUIRES_X86_AVX;
15624 for (size_t k = 1; k < 8; k++) {
15625 GemmMicrokernelTester()
15626 .mr(4)
15627 .nr(4)
15628 .kr(2)
15629 .sr(1)
15630 .m(4)
15631 .n(4)
15632 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080015633 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015634 }
15635 }
15636
15637 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_lt_8_strided_a) {
15638 TEST_REQUIRES_X86_AVX;
15639 for (size_t k = 1; k < 8; k++) {
15640 GemmMicrokernelTester()
15641 .mr(4)
15642 .nr(4)
15643 .kr(2)
15644 .sr(1)
15645 .m(4)
15646 .n(4)
15647 .k(k)
15648 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080015649 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015650 }
15651 }
15652
15653 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_lt_8_subtile) {
15654 TEST_REQUIRES_X86_AVX;
15655 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015656 for (uint32_t n = 1; n <= 4; n++) {
15657 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070015658 GemmMicrokernelTester()
15659 .mr(4)
15660 .nr(4)
15661 .kr(2)
15662 .sr(1)
15663 .m(m)
15664 .n(n)
15665 .k(k)
15666 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015667 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015668 }
15669 }
15670 }
15671 }
15672
15673 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_gt_8) {
15674 TEST_REQUIRES_X86_AVX;
15675 for (size_t k = 9; k < 16; k++) {
15676 GemmMicrokernelTester()
15677 .mr(4)
15678 .nr(4)
15679 .kr(2)
15680 .sr(1)
15681 .m(4)
15682 .n(4)
15683 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080015684 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015685 }
15686 }
15687
15688 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_gt_8_strided_a) {
15689 TEST_REQUIRES_X86_AVX;
15690 for (size_t k = 9; k < 16; k++) {
15691 GemmMicrokernelTester()
15692 .mr(4)
15693 .nr(4)
15694 .kr(2)
15695 .sr(1)
15696 .m(4)
15697 .n(4)
15698 .k(k)
15699 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080015700 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015701 }
15702 }
15703
15704 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_gt_8_subtile) {
15705 TEST_REQUIRES_X86_AVX;
15706 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015707 for (uint32_t n = 1; n <= 4; n++) {
15708 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070015709 GemmMicrokernelTester()
15710 .mr(4)
15711 .nr(4)
15712 .kr(2)
15713 .sr(1)
15714 .m(m)
15715 .n(n)
15716 .k(k)
15717 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015718 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015719 }
15720 }
15721 }
15722 }
15723
15724 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_div_8) {
15725 TEST_REQUIRES_X86_AVX;
15726 for (size_t k = 16; k <= 80; k += 8) {
15727 GemmMicrokernelTester()
15728 .mr(4)
15729 .nr(4)
15730 .kr(2)
15731 .sr(1)
15732 .m(4)
15733 .n(4)
15734 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080015735 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015736 }
15737 }
15738
15739 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_div_8_strided_a) {
15740 TEST_REQUIRES_X86_AVX;
15741 for (size_t k = 16; k <= 80; k += 8) {
15742 GemmMicrokernelTester()
15743 .mr(4)
15744 .nr(4)
15745 .kr(2)
15746 .sr(1)
15747 .m(4)
15748 .n(4)
15749 .k(k)
15750 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080015751 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015752 }
15753 }
15754
15755 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_div_8_subtile) {
15756 TEST_REQUIRES_X86_AVX;
15757 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015758 for (uint32_t n = 1; n <= 4; n++) {
15759 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070015760 GemmMicrokernelTester()
15761 .mr(4)
15762 .nr(4)
15763 .kr(2)
15764 .sr(1)
15765 .m(m)
15766 .n(n)
15767 .k(k)
15768 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015769 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015770 }
15771 }
15772 }
15773 }
15774
15775 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_gt_4) {
15776 TEST_REQUIRES_X86_AVX;
15777 for (uint32_t n = 5; n < 8; n++) {
15778 for (size_t k = 1; k <= 40; k += 9) {
15779 GemmMicrokernelTester()
15780 .mr(4)
15781 .nr(4)
15782 .kr(2)
15783 .sr(1)
15784 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015785 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070015786 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080015787 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015788 }
15789 }
15790 }
15791
15792 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_gt_4_strided_cn) {
15793 TEST_REQUIRES_X86_AVX;
15794 for (uint32_t n = 5; n < 8; n++) {
15795 for (size_t k = 1; k <= 40; k += 9) {
15796 GemmMicrokernelTester()
15797 .mr(4)
15798 .nr(4)
15799 .kr(2)
15800 .sr(1)
15801 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015802 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070015803 .k(k)
15804 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080015805 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015806 }
15807 }
15808 }
15809
15810 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_gt_4_strided_a) {
15811 TEST_REQUIRES_X86_AVX;
15812 for (uint32_t n = 5; n < 8; n++) {
15813 for (size_t k = 1; k <= 40; k += 9) {
15814 GemmMicrokernelTester()
15815 .mr(4)
15816 .nr(4)
15817 .kr(2)
15818 .sr(1)
15819 .m(4)
15820 .n(n)
15821 .k(k)
15822 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080015823 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015824 }
15825 }
15826 }
15827
15828 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_gt_4_subtile) {
15829 TEST_REQUIRES_X86_AVX;
15830 for (uint32_t n = 5; n < 8; n++) {
15831 for (size_t k = 1; k <= 40; k += 9) {
15832 for (uint32_t m = 1; m <= 4; m++) {
15833 GemmMicrokernelTester()
15834 .mr(4)
15835 .nr(4)
15836 .kr(2)
15837 .sr(1)
15838 .m(m)
15839 .n(n)
15840 .k(k)
15841 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015842 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015843 }
15844 }
15845 }
15846 }
15847
15848 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_div_4) {
15849 TEST_REQUIRES_X86_AVX;
15850 for (uint32_t n = 8; n <= 12; n += 4) {
15851 for (size_t k = 1; k <= 40; k += 9) {
15852 GemmMicrokernelTester()
15853 .mr(4)
15854 .nr(4)
15855 .kr(2)
15856 .sr(1)
15857 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015858 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070015859 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080015860 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015861 }
15862 }
15863 }
15864
15865 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_div_4_strided_cn) {
15866 TEST_REQUIRES_X86_AVX;
15867 for (uint32_t n = 8; n <= 12; n += 4) {
15868 for (size_t k = 1; k <= 40; k += 9) {
15869 GemmMicrokernelTester()
15870 .mr(4)
15871 .nr(4)
15872 .kr(2)
15873 .sr(1)
15874 .m(4)
15875 .n(n)
15876 .k(k)
15877 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080015878 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015879 }
15880 }
15881 }
15882
15883 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_div_4_strided_a) {
15884 TEST_REQUIRES_X86_AVX;
15885 for (uint32_t n = 8; n <= 12; n += 4) {
15886 for (size_t k = 1; k <= 40; k += 9) {
15887 GemmMicrokernelTester()
15888 .mr(4)
15889 .nr(4)
15890 .kr(2)
15891 .sr(1)
15892 .m(4)
15893 .n(n)
15894 .k(k)
15895 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080015896 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015897 }
15898 }
15899 }
15900
15901 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_div_4_subtile) {
15902 TEST_REQUIRES_X86_AVX;
15903 for (uint32_t n = 8; n <= 12; n += 4) {
15904 for (size_t k = 1; k <= 40; k += 9) {
15905 for (uint32_t m = 1; m <= 4; m++) {
15906 GemmMicrokernelTester()
15907 .mr(4)
15908 .nr(4)
15909 .kr(2)
15910 .sr(1)
15911 .m(m)
15912 .n(n)
15913 .k(k)
15914 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015915 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015916 }
15917 }
15918 }
15919 }
15920
15921 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, strided_cm_subtile) {
15922 TEST_REQUIRES_X86_AVX;
15923 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015924 for (uint32_t n = 1; n <= 4; n++) {
15925 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070015926 GemmMicrokernelTester()
15927 .mr(4)
15928 .nr(4)
15929 .kr(2)
15930 .sr(1)
15931 .m(m)
15932 .n(n)
15933 .k(k)
15934 .cm_stride(7)
15935 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015936 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015937 }
15938 }
15939 }
15940 }
15941
15942 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, qmin) {
15943 TEST_REQUIRES_X86_AVX;
15944 GemmMicrokernelTester()
15945 .mr(4)
15946 .nr(4)
15947 .kr(2)
15948 .sr(1)
15949 .m(4)
15950 .n(4)
15951 .k(8)
15952 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015953 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015954 }
15955
15956 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, qmax) {
15957 TEST_REQUIRES_X86_AVX;
15958 GemmMicrokernelTester()
15959 .mr(4)
15960 .nr(4)
15961 .kr(2)
15962 .sr(1)
15963 .m(4)
15964 .n(4)
15965 .k(8)
15966 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015967 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015968 }
15969
15970 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__AVX_LD64, strided_cm) {
15971 TEST_REQUIRES_X86_AVX;
15972 GemmMicrokernelTester()
15973 .mr(4)
15974 .nr(4)
15975 .kr(2)
15976 .sr(1)
15977 .m(4)
15978 .n(4)
15979 .k(8)
15980 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080015981 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015982 }
15983#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
15984
15985
15986#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhanc46e6712021-06-01 19:00:16 -070015987 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_eq_8) {
15988 TEST_REQUIRES_X86_XOP;
15989 GemmMicrokernelTester()
15990 .mr(2)
15991 .nr(4)
15992 .kr(2)
15993 .sr(1)
15994 .m(2)
15995 .n(4)
15996 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080015997 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070015998 }
15999
16000 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, strided_cn) {
16001 TEST_REQUIRES_X86_XOP;
16002 GemmMicrokernelTester()
16003 .mr(2)
16004 .nr(4)
16005 .kr(2)
16006 .sr(1)
16007 .m(2)
16008 .n(4)
16009 .k(8)
16010 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080016011 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016012 }
16013
16014 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_eq_8_strided_a) {
16015 TEST_REQUIRES_X86_XOP;
16016 GemmMicrokernelTester()
16017 .mr(2)
16018 .nr(4)
16019 .kr(2)
16020 .sr(1)
16021 .m(2)
16022 .n(4)
16023 .k(8)
16024 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080016025 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016026 }
16027
16028 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_eq_8_subtile) {
16029 TEST_REQUIRES_X86_XOP;
Zhi An Ng83844ae2022-01-14 09:52:25 -080016030 for (uint32_t n = 1; n <= 4; n++) {
16031 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070016032 GemmMicrokernelTester()
16033 .mr(2)
16034 .nr(4)
16035 .kr(2)
16036 .sr(1)
16037 .m(m)
16038 .n(n)
16039 .k(8)
16040 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016041 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016042 }
16043 }
16044 }
16045
16046 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_eq_8_subtile_m) {
16047 TEST_REQUIRES_X86_XOP;
16048 for (uint32_t m = 1; m <= 2; m++) {
16049 GemmMicrokernelTester()
16050 .mr(2)
16051 .nr(4)
16052 .kr(2)
16053 .sr(1)
16054 .m(m)
16055 .n(4)
16056 .k(8)
16057 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016058 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016059 }
16060 }
16061
16062 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_eq_8_subtile_n) {
16063 TEST_REQUIRES_X86_XOP;
16064 for (uint32_t n = 1; n <= 4; n++) {
16065 GemmMicrokernelTester()
16066 .mr(2)
16067 .nr(4)
16068 .kr(2)
16069 .sr(1)
16070 .m(2)
16071 .n(n)
16072 .k(8)
16073 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016074 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016075 }
16076 }
16077
16078 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_lt_8) {
16079 TEST_REQUIRES_X86_XOP;
16080 for (size_t k = 1; k < 8; k++) {
16081 GemmMicrokernelTester()
16082 .mr(2)
16083 .nr(4)
16084 .kr(2)
16085 .sr(1)
16086 .m(2)
16087 .n(4)
16088 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080016089 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016090 }
16091 }
16092
16093 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_lt_8_strided_a) {
16094 TEST_REQUIRES_X86_XOP;
16095 for (size_t k = 1; k < 8; k++) {
16096 GemmMicrokernelTester()
16097 .mr(2)
16098 .nr(4)
16099 .kr(2)
16100 .sr(1)
16101 .m(2)
16102 .n(4)
16103 .k(k)
16104 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080016105 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016106 }
16107 }
16108
16109 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_lt_8_subtile) {
16110 TEST_REQUIRES_X86_XOP;
16111 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016112 for (uint32_t n = 1; n <= 4; n++) {
16113 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070016114 GemmMicrokernelTester()
16115 .mr(2)
16116 .nr(4)
16117 .kr(2)
16118 .sr(1)
16119 .m(m)
16120 .n(n)
16121 .k(k)
16122 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016123 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016124 }
16125 }
16126 }
16127 }
16128
16129 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_gt_8) {
16130 TEST_REQUIRES_X86_XOP;
16131 for (size_t k = 9; k < 16; k++) {
16132 GemmMicrokernelTester()
16133 .mr(2)
16134 .nr(4)
16135 .kr(2)
16136 .sr(1)
16137 .m(2)
16138 .n(4)
16139 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080016140 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016141 }
16142 }
16143
16144 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_gt_8_strided_a) {
16145 TEST_REQUIRES_X86_XOP;
16146 for (size_t k = 9; k < 16; k++) {
16147 GemmMicrokernelTester()
16148 .mr(2)
16149 .nr(4)
16150 .kr(2)
16151 .sr(1)
16152 .m(2)
16153 .n(4)
16154 .k(k)
16155 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080016156 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016157 }
16158 }
16159
16160 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_gt_8_subtile) {
16161 TEST_REQUIRES_X86_XOP;
16162 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016163 for (uint32_t n = 1; n <= 4; n++) {
16164 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070016165 GemmMicrokernelTester()
16166 .mr(2)
16167 .nr(4)
16168 .kr(2)
16169 .sr(1)
16170 .m(m)
16171 .n(n)
16172 .k(k)
16173 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016174 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016175 }
16176 }
16177 }
16178 }
16179
16180 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_div_8) {
16181 TEST_REQUIRES_X86_XOP;
16182 for (size_t k = 16; k <= 80; k += 8) {
16183 GemmMicrokernelTester()
16184 .mr(2)
16185 .nr(4)
16186 .kr(2)
16187 .sr(1)
16188 .m(2)
16189 .n(4)
16190 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080016191 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016192 }
16193 }
16194
16195 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_div_8_strided_a) {
16196 TEST_REQUIRES_X86_XOP;
16197 for (size_t k = 16; k <= 80; k += 8) {
16198 GemmMicrokernelTester()
16199 .mr(2)
16200 .nr(4)
16201 .kr(2)
16202 .sr(1)
16203 .m(2)
16204 .n(4)
16205 .k(k)
16206 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080016207 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016208 }
16209 }
16210
16211 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_div_8_subtile) {
16212 TEST_REQUIRES_X86_XOP;
16213 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016214 for (uint32_t n = 1; n <= 4; n++) {
16215 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070016216 GemmMicrokernelTester()
16217 .mr(2)
16218 .nr(4)
16219 .kr(2)
16220 .sr(1)
16221 .m(m)
16222 .n(n)
16223 .k(k)
16224 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016225 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016226 }
16227 }
16228 }
16229 }
16230
16231 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_gt_4) {
16232 TEST_REQUIRES_X86_XOP;
16233 for (uint32_t n = 5; n < 8; n++) {
16234 for (size_t k = 1; k <= 40; k += 9) {
16235 GemmMicrokernelTester()
16236 .mr(2)
16237 .nr(4)
16238 .kr(2)
16239 .sr(1)
16240 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016241 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070016242 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080016243 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016244 }
16245 }
16246 }
16247
16248 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_gt_4_strided_cn) {
16249 TEST_REQUIRES_X86_XOP;
16250 for (uint32_t n = 5; n < 8; n++) {
16251 for (size_t k = 1; k <= 40; k += 9) {
16252 GemmMicrokernelTester()
16253 .mr(2)
16254 .nr(4)
16255 .kr(2)
16256 .sr(1)
16257 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016258 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070016259 .k(k)
16260 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080016261 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016262 }
16263 }
16264 }
16265
16266 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_gt_4_strided_a) {
16267 TEST_REQUIRES_X86_XOP;
16268 for (uint32_t n = 5; n < 8; n++) {
16269 for (size_t k = 1; k <= 40; k += 9) {
16270 GemmMicrokernelTester()
16271 .mr(2)
16272 .nr(4)
16273 .kr(2)
16274 .sr(1)
16275 .m(2)
16276 .n(n)
16277 .k(k)
16278 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080016279 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016280 }
16281 }
16282 }
16283
16284 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_gt_4_subtile) {
16285 TEST_REQUIRES_X86_XOP;
16286 for (uint32_t n = 5; n < 8; n++) {
16287 for (size_t k = 1; k <= 40; k += 9) {
16288 for (uint32_t m = 1; m <= 2; m++) {
16289 GemmMicrokernelTester()
16290 .mr(2)
16291 .nr(4)
16292 .kr(2)
16293 .sr(1)
16294 .m(m)
16295 .n(n)
16296 .k(k)
16297 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016298 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016299 }
16300 }
16301 }
16302 }
16303
16304 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_div_4) {
16305 TEST_REQUIRES_X86_XOP;
16306 for (uint32_t n = 8; n <= 12; n += 4) {
16307 for (size_t k = 1; k <= 40; k += 9) {
16308 GemmMicrokernelTester()
16309 .mr(2)
16310 .nr(4)
16311 .kr(2)
16312 .sr(1)
16313 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016314 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070016315 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080016316 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016317 }
16318 }
16319 }
16320
16321 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_div_4_strided_cn) {
16322 TEST_REQUIRES_X86_XOP;
16323 for (uint32_t n = 8; n <= 12; n += 4) {
16324 for (size_t k = 1; k <= 40; k += 9) {
16325 GemmMicrokernelTester()
16326 .mr(2)
16327 .nr(4)
16328 .kr(2)
16329 .sr(1)
16330 .m(2)
16331 .n(n)
16332 .k(k)
16333 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080016334 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016335 }
16336 }
16337 }
16338
16339 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_div_4_strided_a) {
16340 TEST_REQUIRES_X86_XOP;
16341 for (uint32_t n = 8; n <= 12; n += 4) {
16342 for (size_t k = 1; k <= 40; k += 9) {
16343 GemmMicrokernelTester()
16344 .mr(2)
16345 .nr(4)
16346 .kr(2)
16347 .sr(1)
16348 .m(2)
16349 .n(n)
16350 .k(k)
16351 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080016352 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016353 }
16354 }
16355 }
16356
16357 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_div_4_subtile) {
16358 TEST_REQUIRES_X86_XOP;
16359 for (uint32_t n = 8; n <= 12; n += 4) {
16360 for (size_t k = 1; k <= 40; k += 9) {
16361 for (uint32_t m = 1; m <= 2; m++) {
16362 GemmMicrokernelTester()
16363 .mr(2)
16364 .nr(4)
16365 .kr(2)
16366 .sr(1)
16367 .m(m)
16368 .n(n)
16369 .k(k)
16370 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016371 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016372 }
16373 }
16374 }
16375 }
16376
16377 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, strided_cm_subtile) {
16378 TEST_REQUIRES_X86_XOP;
16379 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016380 for (uint32_t n = 1; n <= 4; n++) {
16381 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070016382 GemmMicrokernelTester()
16383 .mr(2)
16384 .nr(4)
16385 .kr(2)
16386 .sr(1)
16387 .m(m)
16388 .n(n)
16389 .k(k)
16390 .cm_stride(7)
16391 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016392 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016393 }
16394 }
16395 }
16396 }
16397
16398 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, qmin) {
16399 TEST_REQUIRES_X86_XOP;
16400 GemmMicrokernelTester()
16401 .mr(2)
16402 .nr(4)
16403 .kr(2)
16404 .sr(1)
16405 .m(2)
16406 .n(4)
16407 .k(8)
16408 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016409 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016410 }
16411
16412 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, qmax) {
16413 TEST_REQUIRES_X86_XOP;
16414 GemmMicrokernelTester()
16415 .mr(2)
16416 .nr(4)
16417 .kr(2)
16418 .sr(1)
16419 .m(2)
16420 .n(4)
16421 .k(8)
16422 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016423 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016424 }
16425
16426 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__XOP_LD64, strided_cm) {
16427 TEST_REQUIRES_X86_XOP;
16428 GemmMicrokernelTester()
16429 .mr(2)
16430 .nr(4)
16431 .kr(2)
16432 .sr(1)
16433 .m(2)
16434 .n(4)
16435 .k(8)
16436 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080016437 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016438 }
16439#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
16440
16441
16442#if XNN_ARCH_X86 || XNN_ARCH_X86_64
16443 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_eq_8) {
16444 TEST_REQUIRES_X86_XOP;
16445 GemmMicrokernelTester()
16446 .mr(3)
16447 .nr(4)
16448 .kr(2)
16449 .sr(1)
16450 .m(3)
16451 .n(4)
16452 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080016453 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016454 }
16455
16456 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, strided_cn) {
16457 TEST_REQUIRES_X86_XOP;
16458 GemmMicrokernelTester()
16459 .mr(3)
16460 .nr(4)
16461 .kr(2)
16462 .sr(1)
16463 .m(3)
16464 .n(4)
16465 .k(8)
16466 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080016467 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016468 }
16469
16470 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_eq_8_strided_a) {
16471 TEST_REQUIRES_X86_XOP;
16472 GemmMicrokernelTester()
16473 .mr(3)
16474 .nr(4)
16475 .kr(2)
16476 .sr(1)
16477 .m(3)
16478 .n(4)
16479 .k(8)
16480 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080016481 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016482 }
16483
16484 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_eq_8_subtile) {
16485 TEST_REQUIRES_X86_XOP;
Zhi An Ng83844ae2022-01-14 09:52:25 -080016486 for (uint32_t n = 1; n <= 4; n++) {
16487 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070016488 GemmMicrokernelTester()
16489 .mr(3)
16490 .nr(4)
16491 .kr(2)
16492 .sr(1)
16493 .m(m)
16494 .n(n)
16495 .k(8)
16496 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016497 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016498 }
16499 }
16500 }
16501
16502 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_eq_8_subtile_m) {
16503 TEST_REQUIRES_X86_XOP;
16504 for (uint32_t m = 1; m <= 3; m++) {
16505 GemmMicrokernelTester()
16506 .mr(3)
16507 .nr(4)
16508 .kr(2)
16509 .sr(1)
16510 .m(m)
16511 .n(4)
16512 .k(8)
16513 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016514 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016515 }
16516 }
16517
16518 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_eq_8_subtile_n) {
16519 TEST_REQUIRES_X86_XOP;
16520 for (uint32_t n = 1; n <= 4; n++) {
16521 GemmMicrokernelTester()
16522 .mr(3)
16523 .nr(4)
16524 .kr(2)
16525 .sr(1)
16526 .m(3)
16527 .n(n)
16528 .k(8)
16529 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016530 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016531 }
16532 }
16533
16534 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_lt_8) {
16535 TEST_REQUIRES_X86_XOP;
16536 for (size_t k = 1; k < 8; k++) {
16537 GemmMicrokernelTester()
16538 .mr(3)
16539 .nr(4)
16540 .kr(2)
16541 .sr(1)
16542 .m(3)
16543 .n(4)
16544 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080016545 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016546 }
16547 }
16548
16549 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_lt_8_strided_a) {
16550 TEST_REQUIRES_X86_XOP;
16551 for (size_t k = 1; k < 8; k++) {
16552 GemmMicrokernelTester()
16553 .mr(3)
16554 .nr(4)
16555 .kr(2)
16556 .sr(1)
16557 .m(3)
16558 .n(4)
16559 .k(k)
16560 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080016561 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016562 }
16563 }
16564
16565 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_lt_8_subtile) {
16566 TEST_REQUIRES_X86_XOP;
16567 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016568 for (uint32_t n = 1; n <= 4; n++) {
16569 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070016570 GemmMicrokernelTester()
16571 .mr(3)
16572 .nr(4)
16573 .kr(2)
16574 .sr(1)
16575 .m(m)
16576 .n(n)
16577 .k(k)
16578 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016579 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016580 }
16581 }
16582 }
16583 }
16584
16585 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_gt_8) {
16586 TEST_REQUIRES_X86_XOP;
16587 for (size_t k = 9; k < 16; k++) {
16588 GemmMicrokernelTester()
16589 .mr(3)
16590 .nr(4)
16591 .kr(2)
16592 .sr(1)
16593 .m(3)
16594 .n(4)
16595 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080016596 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016597 }
16598 }
16599
16600 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_gt_8_strided_a) {
16601 TEST_REQUIRES_X86_XOP;
16602 for (size_t k = 9; k < 16; k++) {
16603 GemmMicrokernelTester()
16604 .mr(3)
16605 .nr(4)
16606 .kr(2)
16607 .sr(1)
16608 .m(3)
16609 .n(4)
16610 .k(k)
16611 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080016612 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016613 }
16614 }
16615
16616 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_gt_8_subtile) {
16617 TEST_REQUIRES_X86_XOP;
16618 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016619 for (uint32_t n = 1; n <= 4; n++) {
16620 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070016621 GemmMicrokernelTester()
16622 .mr(3)
16623 .nr(4)
16624 .kr(2)
16625 .sr(1)
16626 .m(m)
16627 .n(n)
16628 .k(k)
16629 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016630 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016631 }
16632 }
16633 }
16634 }
16635
16636 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_div_8) {
16637 TEST_REQUIRES_X86_XOP;
16638 for (size_t k = 16; k <= 80; k += 8) {
16639 GemmMicrokernelTester()
16640 .mr(3)
16641 .nr(4)
16642 .kr(2)
16643 .sr(1)
16644 .m(3)
16645 .n(4)
16646 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080016647 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016648 }
16649 }
16650
16651 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_div_8_strided_a) {
16652 TEST_REQUIRES_X86_XOP;
16653 for (size_t k = 16; k <= 80; k += 8) {
16654 GemmMicrokernelTester()
16655 .mr(3)
16656 .nr(4)
16657 .kr(2)
16658 .sr(1)
16659 .m(3)
16660 .n(4)
16661 .k(k)
16662 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080016663 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016664 }
16665 }
16666
16667 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_div_8_subtile) {
16668 TEST_REQUIRES_X86_XOP;
16669 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016670 for (uint32_t n = 1; n <= 4; n++) {
16671 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070016672 GemmMicrokernelTester()
16673 .mr(3)
16674 .nr(4)
16675 .kr(2)
16676 .sr(1)
16677 .m(m)
16678 .n(n)
16679 .k(k)
16680 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016681 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016682 }
16683 }
16684 }
16685 }
16686
16687 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_gt_4) {
16688 TEST_REQUIRES_X86_XOP;
16689 for (uint32_t n = 5; n < 8; n++) {
16690 for (size_t k = 1; k <= 40; k += 9) {
16691 GemmMicrokernelTester()
16692 .mr(3)
16693 .nr(4)
16694 .kr(2)
16695 .sr(1)
16696 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016697 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070016698 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080016699 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016700 }
16701 }
16702 }
16703
16704 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_gt_4_strided_cn) {
16705 TEST_REQUIRES_X86_XOP;
16706 for (uint32_t n = 5; n < 8; n++) {
16707 for (size_t k = 1; k <= 40; k += 9) {
16708 GemmMicrokernelTester()
16709 .mr(3)
16710 .nr(4)
16711 .kr(2)
16712 .sr(1)
16713 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016714 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070016715 .k(k)
16716 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080016717 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016718 }
16719 }
16720 }
16721
16722 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_gt_4_strided_a) {
16723 TEST_REQUIRES_X86_XOP;
16724 for (uint32_t n = 5; n < 8; n++) {
16725 for (size_t k = 1; k <= 40; k += 9) {
16726 GemmMicrokernelTester()
16727 .mr(3)
16728 .nr(4)
16729 .kr(2)
16730 .sr(1)
16731 .m(3)
16732 .n(n)
16733 .k(k)
16734 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080016735 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016736 }
16737 }
16738 }
16739
16740 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_gt_4_subtile) {
16741 TEST_REQUIRES_X86_XOP;
16742 for (uint32_t n = 5; n < 8; n++) {
16743 for (size_t k = 1; k <= 40; k += 9) {
16744 for (uint32_t m = 1; m <= 3; m++) {
16745 GemmMicrokernelTester()
16746 .mr(3)
16747 .nr(4)
16748 .kr(2)
16749 .sr(1)
16750 .m(m)
16751 .n(n)
16752 .k(k)
16753 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016754 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016755 }
16756 }
16757 }
16758 }
16759
16760 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_div_4) {
16761 TEST_REQUIRES_X86_XOP;
16762 for (uint32_t n = 8; n <= 12; n += 4) {
16763 for (size_t k = 1; k <= 40; k += 9) {
16764 GemmMicrokernelTester()
16765 .mr(3)
16766 .nr(4)
16767 .kr(2)
16768 .sr(1)
16769 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016770 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070016771 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080016772 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016773 }
16774 }
16775 }
16776
16777 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_div_4_strided_cn) {
16778 TEST_REQUIRES_X86_XOP;
16779 for (uint32_t n = 8; n <= 12; n += 4) {
16780 for (size_t k = 1; k <= 40; k += 9) {
16781 GemmMicrokernelTester()
16782 .mr(3)
16783 .nr(4)
16784 .kr(2)
16785 .sr(1)
16786 .m(3)
16787 .n(n)
16788 .k(k)
16789 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080016790 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016791 }
16792 }
16793 }
16794
16795 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_div_4_strided_a) {
16796 TEST_REQUIRES_X86_XOP;
16797 for (uint32_t n = 8; n <= 12; n += 4) {
16798 for (size_t k = 1; k <= 40; k += 9) {
16799 GemmMicrokernelTester()
16800 .mr(3)
16801 .nr(4)
16802 .kr(2)
16803 .sr(1)
16804 .m(3)
16805 .n(n)
16806 .k(k)
16807 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080016808 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016809 }
16810 }
16811 }
16812
16813 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_div_4_subtile) {
16814 TEST_REQUIRES_X86_XOP;
16815 for (uint32_t n = 8; n <= 12; n += 4) {
16816 for (size_t k = 1; k <= 40; k += 9) {
16817 for (uint32_t m = 1; m <= 3; m++) {
16818 GemmMicrokernelTester()
16819 .mr(3)
16820 .nr(4)
16821 .kr(2)
16822 .sr(1)
16823 .m(m)
16824 .n(n)
16825 .k(k)
16826 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016827 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016828 }
16829 }
16830 }
16831 }
16832
16833 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, strided_cm_subtile) {
16834 TEST_REQUIRES_X86_XOP;
16835 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016836 for (uint32_t n = 1; n <= 4; n++) {
16837 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070016838 GemmMicrokernelTester()
16839 .mr(3)
16840 .nr(4)
16841 .kr(2)
16842 .sr(1)
16843 .m(m)
16844 .n(n)
16845 .k(k)
16846 .cm_stride(7)
16847 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016848 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016849 }
16850 }
16851 }
16852 }
16853
16854 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, qmin) {
16855 TEST_REQUIRES_X86_XOP;
16856 GemmMicrokernelTester()
16857 .mr(3)
16858 .nr(4)
16859 .kr(2)
16860 .sr(1)
16861 .m(3)
16862 .n(4)
16863 .k(8)
16864 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016865 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016866 }
16867
16868 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, qmax) {
16869 TEST_REQUIRES_X86_XOP;
16870 GemmMicrokernelTester()
16871 .mr(3)
16872 .nr(4)
16873 .kr(2)
16874 .sr(1)
16875 .m(3)
16876 .n(4)
16877 .k(8)
16878 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016879 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016880 }
16881
16882 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, strided_cm) {
16883 TEST_REQUIRES_X86_XOP;
16884 GemmMicrokernelTester()
16885 .mr(3)
16886 .nr(4)
16887 .kr(2)
16888 .sr(1)
16889 .m(3)
16890 .n(4)
16891 .k(8)
16892 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080016893 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016894 }
16895#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
16896
16897
16898#if XNN_ARCH_X86 || XNN_ARCH_X86_64
16899 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_eq_8) {
16900 TEST_REQUIRES_X86_XOP;
16901 GemmMicrokernelTester()
16902 .mr(4)
16903 .nr(4)
16904 .kr(2)
16905 .sr(1)
16906 .m(4)
16907 .n(4)
16908 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080016909 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016910 }
16911
16912 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, strided_cn) {
16913 TEST_REQUIRES_X86_XOP;
16914 GemmMicrokernelTester()
16915 .mr(4)
16916 .nr(4)
16917 .kr(2)
16918 .sr(1)
16919 .m(4)
16920 .n(4)
16921 .k(8)
16922 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080016923 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016924 }
16925
16926 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_eq_8_strided_a) {
16927 TEST_REQUIRES_X86_XOP;
16928 GemmMicrokernelTester()
16929 .mr(4)
16930 .nr(4)
16931 .kr(2)
16932 .sr(1)
16933 .m(4)
16934 .n(4)
16935 .k(8)
16936 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080016937 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016938 }
16939
16940 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_eq_8_subtile) {
16941 TEST_REQUIRES_X86_XOP;
Zhi An Ng83844ae2022-01-14 09:52:25 -080016942 for (uint32_t n = 1; n <= 4; n++) {
16943 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070016944 GemmMicrokernelTester()
16945 .mr(4)
16946 .nr(4)
16947 .kr(2)
16948 .sr(1)
16949 .m(m)
16950 .n(n)
16951 .k(8)
16952 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016953 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016954 }
16955 }
16956 }
16957
16958 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_eq_8_subtile_m) {
16959 TEST_REQUIRES_X86_XOP;
16960 for (uint32_t m = 1; m <= 4; m++) {
16961 GemmMicrokernelTester()
16962 .mr(4)
16963 .nr(4)
16964 .kr(2)
16965 .sr(1)
16966 .m(m)
16967 .n(4)
16968 .k(8)
16969 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016970 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016971 }
16972 }
16973
16974 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_eq_8_subtile_n) {
16975 TEST_REQUIRES_X86_XOP;
16976 for (uint32_t n = 1; n <= 4; n++) {
16977 GemmMicrokernelTester()
16978 .mr(4)
16979 .nr(4)
16980 .kr(2)
16981 .sr(1)
16982 .m(4)
16983 .n(n)
16984 .k(8)
16985 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016986 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070016987 }
16988 }
16989
16990 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_lt_8) {
16991 TEST_REQUIRES_X86_XOP;
16992 for (size_t k = 1; k < 8; k++) {
16993 GemmMicrokernelTester()
16994 .mr(4)
16995 .nr(4)
16996 .kr(2)
16997 .sr(1)
16998 .m(4)
16999 .n(4)
17000 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080017001 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017002 }
17003 }
17004
17005 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_lt_8_strided_a) {
17006 TEST_REQUIRES_X86_XOP;
17007 for (size_t k = 1; k < 8; k++) {
17008 GemmMicrokernelTester()
17009 .mr(4)
17010 .nr(4)
17011 .kr(2)
17012 .sr(1)
17013 .m(4)
17014 .n(4)
17015 .k(k)
17016 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080017017 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017018 }
17019 }
17020
17021 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_lt_8_subtile) {
17022 TEST_REQUIRES_X86_XOP;
17023 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017024 for (uint32_t n = 1; n <= 4; n++) {
17025 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070017026 GemmMicrokernelTester()
17027 .mr(4)
17028 .nr(4)
17029 .kr(2)
17030 .sr(1)
17031 .m(m)
17032 .n(n)
17033 .k(k)
17034 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017035 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017036 }
17037 }
17038 }
17039 }
17040
17041 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_gt_8) {
17042 TEST_REQUIRES_X86_XOP;
17043 for (size_t k = 9; k < 16; k++) {
17044 GemmMicrokernelTester()
17045 .mr(4)
17046 .nr(4)
17047 .kr(2)
17048 .sr(1)
17049 .m(4)
17050 .n(4)
17051 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080017052 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017053 }
17054 }
17055
17056 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_gt_8_strided_a) {
17057 TEST_REQUIRES_X86_XOP;
17058 for (size_t k = 9; k < 16; k++) {
17059 GemmMicrokernelTester()
17060 .mr(4)
17061 .nr(4)
17062 .kr(2)
17063 .sr(1)
17064 .m(4)
17065 .n(4)
17066 .k(k)
17067 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080017068 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017069 }
17070 }
17071
17072 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_gt_8_subtile) {
17073 TEST_REQUIRES_X86_XOP;
17074 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017075 for (uint32_t n = 1; n <= 4; n++) {
17076 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070017077 GemmMicrokernelTester()
17078 .mr(4)
17079 .nr(4)
17080 .kr(2)
17081 .sr(1)
17082 .m(m)
17083 .n(n)
17084 .k(k)
17085 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017086 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017087 }
17088 }
17089 }
17090 }
17091
17092 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_div_8) {
17093 TEST_REQUIRES_X86_XOP;
17094 for (size_t k = 16; k <= 80; k += 8) {
17095 GemmMicrokernelTester()
17096 .mr(4)
17097 .nr(4)
17098 .kr(2)
17099 .sr(1)
17100 .m(4)
17101 .n(4)
17102 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080017103 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017104 }
17105 }
17106
17107 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_div_8_strided_a) {
17108 TEST_REQUIRES_X86_XOP;
17109 for (size_t k = 16; k <= 80; k += 8) {
17110 GemmMicrokernelTester()
17111 .mr(4)
17112 .nr(4)
17113 .kr(2)
17114 .sr(1)
17115 .m(4)
17116 .n(4)
17117 .k(k)
17118 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080017119 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017120 }
17121 }
17122
17123 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_div_8_subtile) {
17124 TEST_REQUIRES_X86_XOP;
17125 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017126 for (uint32_t n = 1; n <= 4; n++) {
17127 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070017128 GemmMicrokernelTester()
17129 .mr(4)
17130 .nr(4)
17131 .kr(2)
17132 .sr(1)
17133 .m(m)
17134 .n(n)
17135 .k(k)
17136 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017137 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017138 }
17139 }
17140 }
17141 }
17142
17143 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_gt_4) {
17144 TEST_REQUIRES_X86_XOP;
17145 for (uint32_t n = 5; n < 8; n++) {
17146 for (size_t k = 1; k <= 40; k += 9) {
17147 GemmMicrokernelTester()
17148 .mr(4)
17149 .nr(4)
17150 .kr(2)
17151 .sr(1)
17152 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017153 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070017154 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080017155 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017156 }
17157 }
17158 }
17159
17160 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_gt_4_strided_cn) {
17161 TEST_REQUIRES_X86_XOP;
17162 for (uint32_t n = 5; n < 8; n++) {
17163 for (size_t k = 1; k <= 40; k += 9) {
17164 GemmMicrokernelTester()
17165 .mr(4)
17166 .nr(4)
17167 .kr(2)
17168 .sr(1)
17169 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017170 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070017171 .k(k)
17172 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080017173 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017174 }
17175 }
17176 }
17177
17178 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_gt_4_strided_a) {
17179 TEST_REQUIRES_X86_XOP;
17180 for (uint32_t n = 5; n < 8; n++) {
17181 for (size_t k = 1; k <= 40; k += 9) {
17182 GemmMicrokernelTester()
17183 .mr(4)
17184 .nr(4)
17185 .kr(2)
17186 .sr(1)
17187 .m(4)
17188 .n(n)
17189 .k(k)
17190 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080017191 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017192 }
17193 }
17194 }
17195
17196 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_gt_4_subtile) {
17197 TEST_REQUIRES_X86_XOP;
17198 for (uint32_t n = 5; n < 8; n++) {
17199 for (size_t k = 1; k <= 40; k += 9) {
17200 for (uint32_t m = 1; m <= 4; m++) {
17201 GemmMicrokernelTester()
17202 .mr(4)
17203 .nr(4)
17204 .kr(2)
17205 .sr(1)
17206 .m(m)
17207 .n(n)
17208 .k(k)
17209 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017210 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017211 }
17212 }
17213 }
17214 }
17215
17216 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_div_4) {
17217 TEST_REQUIRES_X86_XOP;
17218 for (uint32_t n = 8; n <= 12; n += 4) {
17219 for (size_t k = 1; k <= 40; k += 9) {
17220 GemmMicrokernelTester()
17221 .mr(4)
17222 .nr(4)
17223 .kr(2)
17224 .sr(1)
17225 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017226 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070017227 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080017228 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017229 }
17230 }
17231 }
17232
17233 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_div_4_strided_cn) {
17234 TEST_REQUIRES_X86_XOP;
17235 for (uint32_t n = 8; n <= 12; n += 4) {
17236 for (size_t k = 1; k <= 40; k += 9) {
17237 GemmMicrokernelTester()
17238 .mr(4)
17239 .nr(4)
17240 .kr(2)
17241 .sr(1)
17242 .m(4)
17243 .n(n)
17244 .k(k)
17245 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080017246 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017247 }
17248 }
17249 }
17250
17251 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_div_4_strided_a) {
17252 TEST_REQUIRES_X86_XOP;
17253 for (uint32_t n = 8; n <= 12; n += 4) {
17254 for (size_t k = 1; k <= 40; k += 9) {
17255 GemmMicrokernelTester()
17256 .mr(4)
17257 .nr(4)
17258 .kr(2)
17259 .sr(1)
17260 .m(4)
17261 .n(n)
17262 .k(k)
17263 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080017264 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017265 }
17266 }
17267 }
17268
17269 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_div_4_subtile) {
17270 TEST_REQUIRES_X86_XOP;
17271 for (uint32_t n = 8; n <= 12; n += 4) {
17272 for (size_t k = 1; k <= 40; k += 9) {
17273 for (uint32_t m = 1; m <= 4; m++) {
17274 GemmMicrokernelTester()
17275 .mr(4)
17276 .nr(4)
17277 .kr(2)
17278 .sr(1)
17279 .m(m)
17280 .n(n)
17281 .k(k)
17282 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017283 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017284 }
17285 }
17286 }
17287 }
17288
17289 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, strided_cm_subtile) {
17290 TEST_REQUIRES_X86_XOP;
17291 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017292 for (uint32_t n = 1; n <= 4; n++) {
17293 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070017294 GemmMicrokernelTester()
17295 .mr(4)
17296 .nr(4)
17297 .kr(2)
17298 .sr(1)
17299 .m(m)
17300 .n(n)
17301 .k(k)
17302 .cm_stride(7)
17303 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017304 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017305 }
17306 }
17307 }
17308 }
17309
17310 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, qmin) {
17311 TEST_REQUIRES_X86_XOP;
17312 GemmMicrokernelTester()
17313 .mr(4)
17314 .nr(4)
17315 .kr(2)
17316 .sr(1)
17317 .m(4)
17318 .n(4)
17319 .k(8)
17320 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017321 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017322 }
17323
17324 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, qmax) {
17325 TEST_REQUIRES_X86_XOP;
17326 GemmMicrokernelTester()
17327 .mr(4)
17328 .nr(4)
17329 .kr(2)
17330 .sr(1)
17331 .m(4)
17332 .n(4)
17333 .k(8)
17334 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017335 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017336 }
17337
17338 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__XOP_LD64, strided_cm) {
17339 TEST_REQUIRES_X86_XOP;
17340 GemmMicrokernelTester()
17341 .mr(4)
17342 .nr(4)
17343 .kr(2)
17344 .sr(1)
17345 .m(4)
17346 .n(4)
17347 .k(8)
17348 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080017349 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017350 }
17351#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
17352
17353
17354#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhanc46e6712021-06-01 19:00:16 -070017355 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_eq_8) {
17356 TEST_REQUIRES_X86_SSE2;
17357 GemmMicrokernelTester()
17358 .mr(3)
17359 .nr(4)
17360 .kr(2)
17361 .sr(1)
17362 .m(3)
17363 .n(4)
17364 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080017365 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017366 }
17367
17368 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128, strided_cn) {
17369 TEST_REQUIRES_X86_SSE2;
17370 GemmMicrokernelTester()
17371 .mr(3)
17372 .nr(4)
17373 .kr(2)
17374 .sr(1)
17375 .m(3)
17376 .n(4)
17377 .k(8)
17378 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080017379 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017380 }
17381
17382 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_eq_8_strided_a) {
17383 TEST_REQUIRES_X86_SSE2;
17384 GemmMicrokernelTester()
17385 .mr(3)
17386 .nr(4)
17387 .kr(2)
17388 .sr(1)
17389 .m(3)
17390 .n(4)
17391 .k(8)
17392 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080017393 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017394 }
17395
17396 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_eq_8_subtile) {
17397 TEST_REQUIRES_X86_SSE2;
Zhi An Ng83844ae2022-01-14 09:52:25 -080017398 for (uint32_t n = 1; n <= 4; n++) {
17399 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070017400 GemmMicrokernelTester()
17401 .mr(3)
17402 .nr(4)
17403 .kr(2)
17404 .sr(1)
17405 .m(m)
17406 .n(n)
17407 .k(8)
17408 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017409 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017410 }
17411 }
17412 }
17413
17414 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_eq_8_subtile_m) {
17415 TEST_REQUIRES_X86_SSE2;
17416 for (uint32_t m = 1; m <= 3; m++) {
17417 GemmMicrokernelTester()
17418 .mr(3)
17419 .nr(4)
17420 .kr(2)
17421 .sr(1)
17422 .m(m)
17423 .n(4)
17424 .k(8)
17425 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017426 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017427 }
17428 }
17429
17430 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_eq_8_subtile_n) {
17431 TEST_REQUIRES_X86_SSE2;
17432 for (uint32_t n = 1; n <= 4; n++) {
17433 GemmMicrokernelTester()
17434 .mr(3)
17435 .nr(4)
17436 .kr(2)
17437 .sr(1)
17438 .m(3)
17439 .n(n)
17440 .k(8)
17441 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017442 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017443 }
17444 }
17445
17446 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_lt_8) {
17447 TEST_REQUIRES_X86_SSE2;
17448 for (size_t k = 1; k < 8; k++) {
17449 GemmMicrokernelTester()
17450 .mr(3)
17451 .nr(4)
17452 .kr(2)
17453 .sr(1)
17454 .m(3)
17455 .n(4)
17456 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080017457 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017458 }
17459 }
17460
17461 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_lt_8_strided_a) {
17462 TEST_REQUIRES_X86_SSE2;
17463 for (size_t k = 1; k < 8; k++) {
17464 GemmMicrokernelTester()
17465 .mr(3)
17466 .nr(4)
17467 .kr(2)
17468 .sr(1)
17469 .m(3)
17470 .n(4)
17471 .k(k)
17472 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080017473 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017474 }
17475 }
17476
17477 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_lt_8_subtile) {
17478 TEST_REQUIRES_X86_SSE2;
17479 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017480 for (uint32_t n = 1; n <= 4; n++) {
17481 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070017482 GemmMicrokernelTester()
17483 .mr(3)
17484 .nr(4)
17485 .kr(2)
17486 .sr(1)
17487 .m(m)
17488 .n(n)
17489 .k(k)
17490 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017491 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017492 }
17493 }
17494 }
17495 }
17496
17497 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_gt_8) {
17498 TEST_REQUIRES_X86_SSE2;
17499 for (size_t k = 9; k < 16; k++) {
17500 GemmMicrokernelTester()
17501 .mr(3)
17502 .nr(4)
17503 .kr(2)
17504 .sr(1)
17505 .m(3)
17506 .n(4)
17507 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080017508 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017509 }
17510 }
17511
17512 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_gt_8_strided_a) {
17513 TEST_REQUIRES_X86_SSE2;
17514 for (size_t k = 9; k < 16; k++) {
17515 GemmMicrokernelTester()
17516 .mr(3)
17517 .nr(4)
17518 .kr(2)
17519 .sr(1)
17520 .m(3)
17521 .n(4)
17522 .k(k)
17523 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080017524 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017525 }
17526 }
17527
17528 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_gt_8_subtile) {
17529 TEST_REQUIRES_X86_SSE2;
17530 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017531 for (uint32_t n = 1; n <= 4; n++) {
17532 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070017533 GemmMicrokernelTester()
17534 .mr(3)
17535 .nr(4)
17536 .kr(2)
17537 .sr(1)
17538 .m(m)
17539 .n(n)
17540 .k(k)
17541 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017542 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017543 }
17544 }
17545 }
17546 }
17547
17548 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_div_8) {
17549 TEST_REQUIRES_X86_SSE2;
17550 for (size_t k = 16; k <= 80; k += 8) {
17551 GemmMicrokernelTester()
17552 .mr(3)
17553 .nr(4)
17554 .kr(2)
17555 .sr(1)
17556 .m(3)
17557 .n(4)
17558 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080017559 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017560 }
17561 }
17562
17563 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_div_8_strided_a) {
17564 TEST_REQUIRES_X86_SSE2;
17565 for (size_t k = 16; k <= 80; k += 8) {
17566 GemmMicrokernelTester()
17567 .mr(3)
17568 .nr(4)
17569 .kr(2)
17570 .sr(1)
17571 .m(3)
17572 .n(4)
17573 .k(k)
17574 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080017575 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017576 }
17577 }
17578
17579 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_div_8_subtile) {
17580 TEST_REQUIRES_X86_SSE2;
17581 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017582 for (uint32_t n = 1; n <= 4; n++) {
17583 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070017584 GemmMicrokernelTester()
17585 .mr(3)
17586 .nr(4)
17587 .kr(2)
17588 .sr(1)
17589 .m(m)
17590 .n(n)
17591 .k(k)
17592 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017593 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017594 }
17595 }
17596 }
17597 }
17598
17599 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128, n_gt_4) {
17600 TEST_REQUIRES_X86_SSE2;
17601 for (uint32_t n = 5; n < 8; n++) {
17602 for (size_t k = 1; k <= 40; k += 9) {
17603 GemmMicrokernelTester()
17604 .mr(3)
17605 .nr(4)
17606 .kr(2)
17607 .sr(1)
17608 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017609 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070017610 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080017611 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017612 }
17613 }
17614 }
17615
17616 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128, n_gt_4_strided_cn) {
17617 TEST_REQUIRES_X86_SSE2;
17618 for (uint32_t n = 5; n < 8; n++) {
17619 for (size_t k = 1; k <= 40; k += 9) {
17620 GemmMicrokernelTester()
17621 .mr(3)
17622 .nr(4)
17623 .kr(2)
17624 .sr(1)
17625 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017626 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070017627 .k(k)
17628 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080017629 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017630 }
17631 }
17632 }
17633
17634 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128, n_gt_4_strided_a) {
17635 TEST_REQUIRES_X86_SSE2;
17636 for (uint32_t n = 5; n < 8; n++) {
17637 for (size_t k = 1; k <= 40; k += 9) {
17638 GemmMicrokernelTester()
17639 .mr(3)
17640 .nr(4)
17641 .kr(2)
17642 .sr(1)
17643 .m(3)
17644 .n(n)
17645 .k(k)
17646 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080017647 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017648 }
17649 }
17650 }
17651
17652 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128, n_gt_4_subtile) {
17653 TEST_REQUIRES_X86_SSE2;
17654 for (uint32_t n = 5; n < 8; n++) {
17655 for (size_t k = 1; k <= 40; k += 9) {
17656 for (uint32_t m = 1; m <= 3; m++) {
17657 GemmMicrokernelTester()
17658 .mr(3)
17659 .nr(4)
17660 .kr(2)
17661 .sr(1)
17662 .m(m)
17663 .n(n)
17664 .k(k)
17665 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017666 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017667 }
17668 }
17669 }
17670 }
17671
17672 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128, n_div_4) {
17673 TEST_REQUIRES_X86_SSE2;
17674 for (uint32_t n = 8; n <= 12; n += 4) {
17675 for (size_t k = 1; k <= 40; k += 9) {
17676 GemmMicrokernelTester()
17677 .mr(3)
17678 .nr(4)
17679 .kr(2)
17680 .sr(1)
17681 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017682 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070017683 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080017684 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017685 }
17686 }
17687 }
17688
17689 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128, n_div_4_strided_cn) {
17690 TEST_REQUIRES_X86_SSE2;
17691 for (uint32_t n = 8; n <= 12; n += 4) {
17692 for (size_t k = 1; k <= 40; k += 9) {
17693 GemmMicrokernelTester()
17694 .mr(3)
17695 .nr(4)
17696 .kr(2)
17697 .sr(1)
17698 .m(3)
17699 .n(n)
17700 .k(k)
17701 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080017702 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017703 }
17704 }
17705 }
17706
17707 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128, n_div_4_strided_a) {
17708 TEST_REQUIRES_X86_SSE2;
17709 for (uint32_t n = 8; n <= 12; n += 4) {
17710 for (size_t k = 1; k <= 40; k += 9) {
17711 GemmMicrokernelTester()
17712 .mr(3)
17713 .nr(4)
17714 .kr(2)
17715 .sr(1)
17716 .m(3)
17717 .n(n)
17718 .k(k)
17719 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080017720 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017721 }
17722 }
17723 }
17724
17725 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128, n_div_4_subtile) {
17726 TEST_REQUIRES_X86_SSE2;
17727 for (uint32_t n = 8; n <= 12; n += 4) {
17728 for (size_t k = 1; k <= 40; k += 9) {
17729 for (uint32_t m = 1; m <= 3; m++) {
17730 GemmMicrokernelTester()
17731 .mr(3)
17732 .nr(4)
17733 .kr(2)
17734 .sr(1)
17735 .m(m)
17736 .n(n)
17737 .k(k)
17738 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017739 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017740 }
17741 }
17742 }
17743 }
17744
17745 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128, strided_cm_subtile) {
17746 TEST_REQUIRES_X86_SSE2;
17747 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017748 for (uint32_t n = 1; n <= 4; n++) {
17749 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070017750 GemmMicrokernelTester()
17751 .mr(3)
17752 .nr(4)
17753 .kr(2)
17754 .sr(1)
17755 .m(m)
17756 .n(n)
17757 .k(k)
17758 .cm_stride(7)
17759 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017760 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017761 }
17762 }
17763 }
17764 }
17765
17766 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128, qmin) {
17767 TEST_REQUIRES_X86_SSE2;
17768 GemmMicrokernelTester()
17769 .mr(3)
17770 .nr(4)
17771 .kr(2)
17772 .sr(1)
17773 .m(3)
17774 .n(4)
17775 .k(8)
17776 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017777 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017778 }
17779
17780 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128, qmax) {
17781 TEST_REQUIRES_X86_SSE2;
17782 GemmMicrokernelTester()
17783 .mr(3)
17784 .nr(4)
17785 .kr(2)
17786 .sr(1)
17787 .m(3)
17788 .n(4)
17789 .k(8)
17790 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017791 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017792 }
17793
17794 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128, strided_cm) {
17795 TEST_REQUIRES_X86_SSE2;
17796 GemmMicrokernelTester()
17797 .mr(3)
17798 .nr(4)
17799 .kr(2)
17800 .sr(1)
17801 .m(3)
17802 .n(4)
17803 .k(8)
17804 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080017805 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017806 }
17807#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
17808
17809
17810#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhanc46e6712021-06-01 19:00:16 -070017811 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_eq_8) {
17812 TEST_REQUIRES_X86_SSE41;
17813 GemmMicrokernelTester()
17814 .mr(3)
17815 .nr(4)
17816 .kr(2)
17817 .sr(1)
17818 .m(3)
17819 .n(4)
17820 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080017821 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017822 }
17823
17824 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, strided_cn) {
17825 TEST_REQUIRES_X86_SSE41;
17826 GemmMicrokernelTester()
17827 .mr(3)
17828 .nr(4)
17829 .kr(2)
17830 .sr(1)
17831 .m(3)
17832 .n(4)
17833 .k(8)
17834 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080017835 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017836 }
17837
17838 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_eq_8_strided_a) {
17839 TEST_REQUIRES_X86_SSE41;
17840 GemmMicrokernelTester()
17841 .mr(3)
17842 .nr(4)
17843 .kr(2)
17844 .sr(1)
17845 .m(3)
17846 .n(4)
17847 .k(8)
17848 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080017849 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017850 }
17851
17852 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_eq_8_subtile) {
17853 TEST_REQUIRES_X86_SSE41;
Zhi An Ng83844ae2022-01-14 09:52:25 -080017854 for (uint32_t n = 1; n <= 4; n++) {
17855 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070017856 GemmMicrokernelTester()
17857 .mr(3)
17858 .nr(4)
17859 .kr(2)
17860 .sr(1)
17861 .m(m)
17862 .n(n)
17863 .k(8)
17864 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017865 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017866 }
17867 }
17868 }
17869
17870 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_eq_8_subtile_m) {
17871 TEST_REQUIRES_X86_SSE41;
17872 for (uint32_t m = 1; m <= 3; m++) {
17873 GemmMicrokernelTester()
17874 .mr(3)
17875 .nr(4)
17876 .kr(2)
17877 .sr(1)
17878 .m(m)
17879 .n(4)
17880 .k(8)
17881 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017882 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017883 }
17884 }
17885
17886 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_eq_8_subtile_n) {
17887 TEST_REQUIRES_X86_SSE41;
17888 for (uint32_t n = 1; n <= 4; n++) {
17889 GemmMicrokernelTester()
17890 .mr(3)
17891 .nr(4)
17892 .kr(2)
17893 .sr(1)
17894 .m(3)
17895 .n(n)
17896 .k(8)
17897 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017898 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017899 }
17900 }
17901
17902 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_lt_8) {
17903 TEST_REQUIRES_X86_SSE41;
17904 for (size_t k = 1; k < 8; k++) {
17905 GemmMicrokernelTester()
17906 .mr(3)
17907 .nr(4)
17908 .kr(2)
17909 .sr(1)
17910 .m(3)
17911 .n(4)
17912 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080017913 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017914 }
17915 }
17916
17917 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_lt_8_strided_a) {
17918 TEST_REQUIRES_X86_SSE41;
17919 for (size_t k = 1; k < 8; k++) {
17920 GemmMicrokernelTester()
17921 .mr(3)
17922 .nr(4)
17923 .kr(2)
17924 .sr(1)
17925 .m(3)
17926 .n(4)
17927 .k(k)
17928 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080017929 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017930 }
17931 }
17932
17933 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_lt_8_subtile) {
17934 TEST_REQUIRES_X86_SSE41;
17935 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017936 for (uint32_t n = 1; n <= 4; n++) {
17937 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070017938 GemmMicrokernelTester()
17939 .mr(3)
17940 .nr(4)
17941 .kr(2)
17942 .sr(1)
17943 .m(m)
17944 .n(n)
17945 .k(k)
17946 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017947 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017948 }
17949 }
17950 }
17951 }
17952
17953 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_gt_8) {
17954 TEST_REQUIRES_X86_SSE41;
17955 for (size_t k = 9; k < 16; k++) {
17956 GemmMicrokernelTester()
17957 .mr(3)
17958 .nr(4)
17959 .kr(2)
17960 .sr(1)
17961 .m(3)
17962 .n(4)
17963 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080017964 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017965 }
17966 }
17967
17968 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_gt_8_strided_a) {
17969 TEST_REQUIRES_X86_SSE41;
17970 for (size_t k = 9; k < 16; k++) {
17971 GemmMicrokernelTester()
17972 .mr(3)
17973 .nr(4)
17974 .kr(2)
17975 .sr(1)
17976 .m(3)
17977 .n(4)
17978 .k(k)
17979 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080017980 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017981 }
17982 }
17983
17984 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_gt_8_subtile) {
17985 TEST_REQUIRES_X86_SSE41;
17986 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017987 for (uint32_t n = 1; n <= 4; n++) {
17988 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070017989 GemmMicrokernelTester()
17990 .mr(3)
17991 .nr(4)
17992 .kr(2)
17993 .sr(1)
17994 .m(m)
17995 .n(n)
17996 .k(k)
17997 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017998 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070017999 }
18000 }
18001 }
18002 }
18003
18004 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_div_8) {
18005 TEST_REQUIRES_X86_SSE41;
18006 for (size_t k = 16; k <= 80; k += 8) {
18007 GemmMicrokernelTester()
18008 .mr(3)
18009 .nr(4)
18010 .kr(2)
18011 .sr(1)
18012 .m(3)
18013 .n(4)
18014 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080018015 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018016 }
18017 }
18018
18019 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_div_8_strided_a) {
18020 TEST_REQUIRES_X86_SSE41;
18021 for (size_t k = 16; k <= 80; k += 8) {
18022 GemmMicrokernelTester()
18023 .mr(3)
18024 .nr(4)
18025 .kr(2)
18026 .sr(1)
18027 .m(3)
18028 .n(4)
18029 .k(k)
18030 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080018031 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018032 }
18033 }
18034
18035 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_div_8_subtile) {
18036 TEST_REQUIRES_X86_SSE41;
18037 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018038 for (uint32_t n = 1; n <= 4; n++) {
18039 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070018040 GemmMicrokernelTester()
18041 .mr(3)
18042 .nr(4)
18043 .kr(2)
18044 .sr(1)
18045 .m(m)
18046 .n(n)
18047 .k(k)
18048 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018049 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018050 }
18051 }
18052 }
18053 }
18054
18055 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_gt_4) {
18056 TEST_REQUIRES_X86_SSE41;
18057 for (uint32_t n = 5; n < 8; n++) {
18058 for (size_t k = 1; k <= 40; k += 9) {
18059 GemmMicrokernelTester()
18060 .mr(3)
18061 .nr(4)
18062 .kr(2)
18063 .sr(1)
18064 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018065 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070018066 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080018067 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018068 }
18069 }
18070 }
18071
18072 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_gt_4_strided_cn) {
18073 TEST_REQUIRES_X86_SSE41;
18074 for (uint32_t n = 5; n < 8; n++) {
18075 for (size_t k = 1; k <= 40; k += 9) {
18076 GemmMicrokernelTester()
18077 .mr(3)
18078 .nr(4)
18079 .kr(2)
18080 .sr(1)
18081 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018082 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070018083 .k(k)
18084 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080018085 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018086 }
18087 }
18088 }
18089
18090 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_gt_4_strided_a) {
18091 TEST_REQUIRES_X86_SSE41;
18092 for (uint32_t n = 5; n < 8; n++) {
18093 for (size_t k = 1; k <= 40; k += 9) {
18094 GemmMicrokernelTester()
18095 .mr(3)
18096 .nr(4)
18097 .kr(2)
18098 .sr(1)
18099 .m(3)
18100 .n(n)
18101 .k(k)
18102 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080018103 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018104 }
18105 }
18106 }
18107
18108 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_gt_4_subtile) {
18109 TEST_REQUIRES_X86_SSE41;
18110 for (uint32_t n = 5; n < 8; n++) {
18111 for (size_t k = 1; k <= 40; k += 9) {
18112 for (uint32_t m = 1; m <= 3; m++) {
18113 GemmMicrokernelTester()
18114 .mr(3)
18115 .nr(4)
18116 .kr(2)
18117 .sr(1)
18118 .m(m)
18119 .n(n)
18120 .k(k)
18121 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018122 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018123 }
18124 }
18125 }
18126 }
18127
18128 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_div_4) {
18129 TEST_REQUIRES_X86_SSE41;
18130 for (uint32_t n = 8; n <= 12; n += 4) {
18131 for (size_t k = 1; k <= 40; k += 9) {
18132 GemmMicrokernelTester()
18133 .mr(3)
18134 .nr(4)
18135 .kr(2)
18136 .sr(1)
18137 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018138 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070018139 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080018140 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018141 }
18142 }
18143 }
18144
18145 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_div_4_strided_cn) {
18146 TEST_REQUIRES_X86_SSE41;
18147 for (uint32_t n = 8; n <= 12; n += 4) {
18148 for (size_t k = 1; k <= 40; k += 9) {
18149 GemmMicrokernelTester()
18150 .mr(3)
18151 .nr(4)
18152 .kr(2)
18153 .sr(1)
18154 .m(3)
18155 .n(n)
18156 .k(k)
18157 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080018158 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018159 }
18160 }
18161 }
18162
18163 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_div_4_strided_a) {
18164 TEST_REQUIRES_X86_SSE41;
18165 for (uint32_t n = 8; n <= 12; n += 4) {
18166 for (size_t k = 1; k <= 40; k += 9) {
18167 GemmMicrokernelTester()
18168 .mr(3)
18169 .nr(4)
18170 .kr(2)
18171 .sr(1)
18172 .m(3)
18173 .n(n)
18174 .k(k)
18175 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080018176 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018177 }
18178 }
18179 }
18180
18181 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_div_4_subtile) {
18182 TEST_REQUIRES_X86_SSE41;
18183 for (uint32_t n = 8; n <= 12; n += 4) {
18184 for (size_t k = 1; k <= 40; k += 9) {
18185 for (uint32_t m = 1; m <= 3; m++) {
18186 GemmMicrokernelTester()
18187 .mr(3)
18188 .nr(4)
18189 .kr(2)
18190 .sr(1)
18191 .m(m)
18192 .n(n)
18193 .k(k)
18194 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018195 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018196 }
18197 }
18198 }
18199 }
18200
18201 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, strided_cm_subtile) {
18202 TEST_REQUIRES_X86_SSE41;
18203 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018204 for (uint32_t n = 1; n <= 4; n++) {
18205 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070018206 GemmMicrokernelTester()
18207 .mr(3)
18208 .nr(4)
18209 .kr(2)
18210 .sr(1)
18211 .m(m)
18212 .n(n)
18213 .k(k)
18214 .cm_stride(7)
18215 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018216 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018217 }
18218 }
18219 }
18220 }
18221
18222 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, qmin) {
18223 TEST_REQUIRES_X86_SSE41;
18224 GemmMicrokernelTester()
18225 .mr(3)
18226 .nr(4)
18227 .kr(2)
18228 .sr(1)
18229 .m(3)
18230 .n(4)
18231 .k(8)
18232 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018233 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018234 }
18235
18236 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, qmax) {
18237 TEST_REQUIRES_X86_SSE41;
18238 GemmMicrokernelTester()
18239 .mr(3)
18240 .nr(4)
18241 .kr(2)
18242 .sr(1)
18243 .m(3)
18244 .n(4)
18245 .k(8)
18246 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018247 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018248 }
18249
18250 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__SSE41_LD128, strided_cm) {
18251 TEST_REQUIRES_X86_SSE41;
18252 GemmMicrokernelTester()
18253 .mr(3)
18254 .nr(4)
18255 .kr(2)
18256 .sr(1)
18257 .m(3)
18258 .n(4)
18259 .k(8)
18260 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080018261 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018262 }
18263#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
18264
18265
18266#if XNN_ARCH_X86 || XNN_ARCH_X86_64
18267 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_eq_8) {
18268 TEST_REQUIRES_X86_SSE41;
18269 GemmMicrokernelTester()
18270 .mr(4)
18271 .nr(4)
18272 .kr(2)
18273 .sr(1)
18274 .m(4)
18275 .n(4)
18276 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080018277 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018278 }
18279
18280 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, strided_cn) {
18281 TEST_REQUIRES_X86_SSE41;
18282 GemmMicrokernelTester()
18283 .mr(4)
18284 .nr(4)
18285 .kr(2)
18286 .sr(1)
18287 .m(4)
18288 .n(4)
18289 .k(8)
18290 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080018291 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018292 }
18293
18294 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_eq_8_strided_a) {
18295 TEST_REQUIRES_X86_SSE41;
18296 GemmMicrokernelTester()
18297 .mr(4)
18298 .nr(4)
18299 .kr(2)
18300 .sr(1)
18301 .m(4)
18302 .n(4)
18303 .k(8)
18304 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080018305 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018306 }
18307
18308 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_eq_8_subtile) {
18309 TEST_REQUIRES_X86_SSE41;
Zhi An Ng83844ae2022-01-14 09:52:25 -080018310 for (uint32_t n = 1; n <= 4; n++) {
18311 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070018312 GemmMicrokernelTester()
18313 .mr(4)
18314 .nr(4)
18315 .kr(2)
18316 .sr(1)
18317 .m(m)
18318 .n(n)
18319 .k(8)
18320 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018321 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018322 }
18323 }
18324 }
18325
18326 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_eq_8_subtile_m) {
18327 TEST_REQUIRES_X86_SSE41;
18328 for (uint32_t m = 1; m <= 4; m++) {
18329 GemmMicrokernelTester()
18330 .mr(4)
18331 .nr(4)
18332 .kr(2)
18333 .sr(1)
18334 .m(m)
18335 .n(4)
18336 .k(8)
18337 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018338 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018339 }
18340 }
18341
18342 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_eq_8_subtile_n) {
18343 TEST_REQUIRES_X86_SSE41;
18344 for (uint32_t n = 1; n <= 4; n++) {
18345 GemmMicrokernelTester()
18346 .mr(4)
18347 .nr(4)
18348 .kr(2)
18349 .sr(1)
18350 .m(4)
18351 .n(n)
18352 .k(8)
18353 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018354 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018355 }
18356 }
18357
18358 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_lt_8) {
18359 TEST_REQUIRES_X86_SSE41;
18360 for (size_t k = 1; k < 8; k++) {
18361 GemmMicrokernelTester()
18362 .mr(4)
18363 .nr(4)
18364 .kr(2)
18365 .sr(1)
18366 .m(4)
18367 .n(4)
18368 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080018369 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018370 }
18371 }
18372
18373 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_lt_8_strided_a) {
18374 TEST_REQUIRES_X86_SSE41;
18375 for (size_t k = 1; k < 8; k++) {
18376 GemmMicrokernelTester()
18377 .mr(4)
18378 .nr(4)
18379 .kr(2)
18380 .sr(1)
18381 .m(4)
18382 .n(4)
18383 .k(k)
18384 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080018385 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018386 }
18387 }
18388
18389 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_lt_8_subtile) {
18390 TEST_REQUIRES_X86_SSE41;
18391 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018392 for (uint32_t n = 1; n <= 4; n++) {
18393 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070018394 GemmMicrokernelTester()
18395 .mr(4)
18396 .nr(4)
18397 .kr(2)
18398 .sr(1)
18399 .m(m)
18400 .n(n)
18401 .k(k)
18402 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018403 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018404 }
18405 }
18406 }
18407 }
18408
18409 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_gt_8) {
18410 TEST_REQUIRES_X86_SSE41;
18411 for (size_t k = 9; k < 16; k++) {
18412 GemmMicrokernelTester()
18413 .mr(4)
18414 .nr(4)
18415 .kr(2)
18416 .sr(1)
18417 .m(4)
18418 .n(4)
18419 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080018420 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018421 }
18422 }
18423
18424 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_gt_8_strided_a) {
18425 TEST_REQUIRES_X86_SSE41;
18426 for (size_t k = 9; k < 16; k++) {
18427 GemmMicrokernelTester()
18428 .mr(4)
18429 .nr(4)
18430 .kr(2)
18431 .sr(1)
18432 .m(4)
18433 .n(4)
18434 .k(k)
18435 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080018436 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018437 }
18438 }
18439
18440 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_gt_8_subtile) {
18441 TEST_REQUIRES_X86_SSE41;
18442 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018443 for (uint32_t n = 1; n <= 4; n++) {
18444 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070018445 GemmMicrokernelTester()
18446 .mr(4)
18447 .nr(4)
18448 .kr(2)
18449 .sr(1)
18450 .m(m)
18451 .n(n)
18452 .k(k)
18453 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018454 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018455 }
18456 }
18457 }
18458 }
18459
18460 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_div_8) {
18461 TEST_REQUIRES_X86_SSE41;
18462 for (size_t k = 16; k <= 80; k += 8) {
18463 GemmMicrokernelTester()
18464 .mr(4)
18465 .nr(4)
18466 .kr(2)
18467 .sr(1)
18468 .m(4)
18469 .n(4)
18470 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080018471 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018472 }
18473 }
18474
18475 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_div_8_strided_a) {
18476 TEST_REQUIRES_X86_SSE41;
18477 for (size_t k = 16; k <= 80; k += 8) {
18478 GemmMicrokernelTester()
18479 .mr(4)
18480 .nr(4)
18481 .kr(2)
18482 .sr(1)
18483 .m(4)
18484 .n(4)
18485 .k(k)
18486 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080018487 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018488 }
18489 }
18490
18491 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_div_8_subtile) {
18492 TEST_REQUIRES_X86_SSE41;
18493 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018494 for (uint32_t n = 1; n <= 4; n++) {
18495 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070018496 GemmMicrokernelTester()
18497 .mr(4)
18498 .nr(4)
18499 .kr(2)
18500 .sr(1)
18501 .m(m)
18502 .n(n)
18503 .k(k)
18504 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018505 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018506 }
18507 }
18508 }
18509 }
18510
18511 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_gt_4) {
18512 TEST_REQUIRES_X86_SSE41;
18513 for (uint32_t n = 5; n < 8; n++) {
18514 for (size_t k = 1; k <= 40; k += 9) {
18515 GemmMicrokernelTester()
18516 .mr(4)
18517 .nr(4)
18518 .kr(2)
18519 .sr(1)
18520 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018521 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070018522 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080018523 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018524 }
18525 }
18526 }
18527
18528 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_gt_4_strided_cn) {
18529 TEST_REQUIRES_X86_SSE41;
18530 for (uint32_t n = 5; n < 8; n++) {
18531 for (size_t k = 1; k <= 40; k += 9) {
18532 GemmMicrokernelTester()
18533 .mr(4)
18534 .nr(4)
18535 .kr(2)
18536 .sr(1)
18537 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018538 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070018539 .k(k)
18540 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080018541 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018542 }
18543 }
18544 }
18545
18546 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_gt_4_strided_a) {
18547 TEST_REQUIRES_X86_SSE41;
18548 for (uint32_t n = 5; n < 8; n++) {
18549 for (size_t k = 1; k <= 40; k += 9) {
18550 GemmMicrokernelTester()
18551 .mr(4)
18552 .nr(4)
18553 .kr(2)
18554 .sr(1)
18555 .m(4)
18556 .n(n)
18557 .k(k)
18558 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080018559 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018560 }
18561 }
18562 }
18563
18564 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_gt_4_subtile) {
18565 TEST_REQUIRES_X86_SSE41;
18566 for (uint32_t n = 5; n < 8; n++) {
18567 for (size_t k = 1; k <= 40; k += 9) {
18568 for (uint32_t m = 1; m <= 4; m++) {
18569 GemmMicrokernelTester()
18570 .mr(4)
18571 .nr(4)
18572 .kr(2)
18573 .sr(1)
18574 .m(m)
18575 .n(n)
18576 .k(k)
18577 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018578 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018579 }
18580 }
18581 }
18582 }
18583
18584 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_div_4) {
18585 TEST_REQUIRES_X86_SSE41;
18586 for (uint32_t n = 8; n <= 12; n += 4) {
18587 for (size_t k = 1; k <= 40; k += 9) {
18588 GemmMicrokernelTester()
18589 .mr(4)
18590 .nr(4)
18591 .kr(2)
18592 .sr(1)
18593 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018594 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070018595 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080018596 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018597 }
18598 }
18599 }
18600
18601 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_div_4_strided_cn) {
18602 TEST_REQUIRES_X86_SSE41;
18603 for (uint32_t n = 8; n <= 12; n += 4) {
18604 for (size_t k = 1; k <= 40; k += 9) {
18605 GemmMicrokernelTester()
18606 .mr(4)
18607 .nr(4)
18608 .kr(2)
18609 .sr(1)
18610 .m(4)
18611 .n(n)
18612 .k(k)
18613 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080018614 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018615 }
18616 }
18617 }
18618
18619 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_div_4_strided_a) {
18620 TEST_REQUIRES_X86_SSE41;
18621 for (uint32_t n = 8; n <= 12; n += 4) {
18622 for (size_t k = 1; k <= 40; k += 9) {
18623 GemmMicrokernelTester()
18624 .mr(4)
18625 .nr(4)
18626 .kr(2)
18627 .sr(1)
18628 .m(4)
18629 .n(n)
18630 .k(k)
18631 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080018632 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018633 }
18634 }
18635 }
18636
18637 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_div_4_subtile) {
18638 TEST_REQUIRES_X86_SSE41;
18639 for (uint32_t n = 8; n <= 12; n += 4) {
18640 for (size_t k = 1; k <= 40; k += 9) {
18641 for (uint32_t m = 1; m <= 4; m++) {
18642 GemmMicrokernelTester()
18643 .mr(4)
18644 .nr(4)
18645 .kr(2)
18646 .sr(1)
18647 .m(m)
18648 .n(n)
18649 .k(k)
18650 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018651 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018652 }
18653 }
18654 }
18655 }
18656
18657 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, strided_cm_subtile) {
18658 TEST_REQUIRES_X86_SSE41;
18659 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018660 for (uint32_t n = 1; n <= 4; n++) {
18661 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070018662 GemmMicrokernelTester()
18663 .mr(4)
18664 .nr(4)
18665 .kr(2)
18666 .sr(1)
18667 .m(m)
18668 .n(n)
18669 .k(k)
18670 .cm_stride(7)
18671 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018672 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018673 }
18674 }
18675 }
18676 }
18677
18678 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, qmin) {
18679 TEST_REQUIRES_X86_SSE41;
18680 GemmMicrokernelTester()
18681 .mr(4)
18682 .nr(4)
18683 .kr(2)
18684 .sr(1)
18685 .m(4)
18686 .n(4)
18687 .k(8)
18688 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018689 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018690 }
18691
18692 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, qmax) {
18693 TEST_REQUIRES_X86_SSE41;
18694 GemmMicrokernelTester()
18695 .mr(4)
18696 .nr(4)
18697 .kr(2)
18698 .sr(1)
18699 .m(4)
18700 .n(4)
18701 .k(8)
18702 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018703 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018704 }
18705
18706 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__SSE41_LD128, strided_cm) {
18707 TEST_REQUIRES_X86_SSE41;
18708 GemmMicrokernelTester()
18709 .mr(4)
18710 .nr(4)
18711 .kr(2)
18712 .sr(1)
18713 .m(4)
18714 .n(4)
18715 .k(8)
18716 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080018717 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018718 }
18719#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
18720
18721
18722#if XNN_ARCH_X86 || XNN_ARCH_X86_64
18723 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_eq_8) {
18724 TEST_REQUIRES_X86_AVX;
18725 GemmMicrokernelTester()
18726 .mr(1)
18727 .nr(4)
18728 .kr(2)
18729 .sr(1)
18730 .m(1)
18731 .n(4)
18732 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080018733 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018734 }
18735
18736 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128, strided_cn) {
18737 TEST_REQUIRES_X86_AVX;
18738 GemmMicrokernelTester()
18739 .mr(1)
18740 .nr(4)
18741 .kr(2)
18742 .sr(1)
18743 .m(1)
18744 .n(4)
18745 .k(8)
18746 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080018747 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018748 }
18749
18750 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_eq_8_strided_a) {
18751 TEST_REQUIRES_X86_AVX;
18752 GemmMicrokernelTester()
18753 .mr(1)
18754 .nr(4)
18755 .kr(2)
18756 .sr(1)
18757 .m(1)
18758 .n(4)
18759 .k(8)
18760 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080018761 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018762 }
18763
18764 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_eq_8_subtile) {
18765 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080018766 for (uint32_t n = 1; n <= 4; n++) {
18767 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070018768 GemmMicrokernelTester()
18769 .mr(1)
18770 .nr(4)
18771 .kr(2)
18772 .sr(1)
18773 .m(m)
18774 .n(n)
18775 .k(8)
18776 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018777 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018778 }
18779 }
18780 }
18781
18782 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_eq_8_subtile_m) {
18783 TEST_REQUIRES_X86_AVX;
18784 for (uint32_t m = 1; m <= 1; m++) {
18785 GemmMicrokernelTester()
18786 .mr(1)
18787 .nr(4)
18788 .kr(2)
18789 .sr(1)
18790 .m(m)
18791 .n(4)
18792 .k(8)
18793 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018794 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018795 }
18796 }
18797
18798 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_eq_8_subtile_n) {
18799 TEST_REQUIRES_X86_AVX;
18800 for (uint32_t n = 1; n <= 4; n++) {
18801 GemmMicrokernelTester()
18802 .mr(1)
18803 .nr(4)
18804 .kr(2)
18805 .sr(1)
18806 .m(1)
18807 .n(n)
18808 .k(8)
18809 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018810 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018811 }
18812 }
18813
18814 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_lt_8) {
18815 TEST_REQUIRES_X86_AVX;
18816 for (size_t k = 1; k < 8; k++) {
18817 GemmMicrokernelTester()
18818 .mr(1)
18819 .nr(4)
18820 .kr(2)
18821 .sr(1)
18822 .m(1)
18823 .n(4)
18824 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080018825 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018826 }
18827 }
18828
18829 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_lt_8_strided_a) {
18830 TEST_REQUIRES_X86_AVX;
18831 for (size_t k = 1; k < 8; k++) {
18832 GemmMicrokernelTester()
18833 .mr(1)
18834 .nr(4)
18835 .kr(2)
18836 .sr(1)
18837 .m(1)
18838 .n(4)
18839 .k(k)
18840 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080018841 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018842 }
18843 }
18844
18845 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_lt_8_subtile) {
18846 TEST_REQUIRES_X86_AVX;
18847 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018848 for (uint32_t n = 1; n <= 4; n++) {
18849 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070018850 GemmMicrokernelTester()
18851 .mr(1)
18852 .nr(4)
18853 .kr(2)
18854 .sr(1)
18855 .m(m)
18856 .n(n)
18857 .k(k)
18858 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018859 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018860 }
18861 }
18862 }
18863 }
18864
18865 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_gt_8) {
18866 TEST_REQUIRES_X86_AVX;
18867 for (size_t k = 9; k < 16; k++) {
18868 GemmMicrokernelTester()
18869 .mr(1)
18870 .nr(4)
18871 .kr(2)
18872 .sr(1)
18873 .m(1)
18874 .n(4)
18875 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080018876 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018877 }
18878 }
18879
18880 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_gt_8_strided_a) {
18881 TEST_REQUIRES_X86_AVX;
18882 for (size_t k = 9; k < 16; k++) {
18883 GemmMicrokernelTester()
18884 .mr(1)
18885 .nr(4)
18886 .kr(2)
18887 .sr(1)
18888 .m(1)
18889 .n(4)
18890 .k(k)
18891 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080018892 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018893 }
18894 }
18895
18896 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_gt_8_subtile) {
18897 TEST_REQUIRES_X86_AVX;
18898 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018899 for (uint32_t n = 1; n <= 4; n++) {
18900 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070018901 GemmMicrokernelTester()
18902 .mr(1)
18903 .nr(4)
18904 .kr(2)
18905 .sr(1)
18906 .m(m)
18907 .n(n)
18908 .k(k)
18909 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018910 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018911 }
18912 }
18913 }
18914 }
18915
18916 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_div_8) {
18917 TEST_REQUIRES_X86_AVX;
18918 for (size_t k = 16; k <= 80; k += 8) {
18919 GemmMicrokernelTester()
18920 .mr(1)
18921 .nr(4)
18922 .kr(2)
18923 .sr(1)
18924 .m(1)
18925 .n(4)
18926 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080018927 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018928 }
18929 }
18930
18931 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_div_8_strided_a) {
18932 TEST_REQUIRES_X86_AVX;
18933 for (size_t k = 16; k <= 80; k += 8) {
18934 GemmMicrokernelTester()
18935 .mr(1)
18936 .nr(4)
18937 .kr(2)
18938 .sr(1)
18939 .m(1)
18940 .n(4)
18941 .k(k)
18942 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080018943 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018944 }
18945 }
18946
18947 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_div_8_subtile) {
18948 TEST_REQUIRES_X86_AVX;
18949 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018950 for (uint32_t n = 1; n <= 4; n++) {
18951 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070018952 GemmMicrokernelTester()
18953 .mr(1)
18954 .nr(4)
18955 .kr(2)
18956 .sr(1)
18957 .m(m)
18958 .n(n)
18959 .k(k)
18960 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018961 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018962 }
18963 }
18964 }
18965 }
18966
18967 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128, n_gt_4) {
18968 TEST_REQUIRES_X86_AVX;
18969 for (uint32_t n = 5; n < 8; n++) {
18970 for (size_t k = 1; k <= 40; k += 9) {
18971 GemmMicrokernelTester()
18972 .mr(1)
18973 .nr(4)
18974 .kr(2)
18975 .sr(1)
18976 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018977 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070018978 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080018979 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018980 }
18981 }
18982 }
18983
18984 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128, n_gt_4_strided_cn) {
18985 TEST_REQUIRES_X86_AVX;
18986 for (uint32_t n = 5; n < 8; n++) {
18987 for (size_t k = 1; k <= 40; k += 9) {
18988 GemmMicrokernelTester()
18989 .mr(1)
18990 .nr(4)
18991 .kr(2)
18992 .sr(1)
18993 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018994 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070018995 .k(k)
18996 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080018997 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070018998 }
18999 }
19000 }
19001
19002 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128, n_gt_4_strided_a) {
19003 TEST_REQUIRES_X86_AVX;
19004 for (uint32_t n = 5; n < 8; n++) {
19005 for (size_t k = 1; k <= 40; k += 9) {
19006 GemmMicrokernelTester()
19007 .mr(1)
19008 .nr(4)
19009 .kr(2)
19010 .sr(1)
19011 .m(1)
19012 .n(n)
19013 .k(k)
19014 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080019015 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019016 }
19017 }
19018 }
19019
19020 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128, n_gt_4_subtile) {
19021 TEST_REQUIRES_X86_AVX;
19022 for (uint32_t n = 5; n < 8; n++) {
19023 for (size_t k = 1; k <= 40; k += 9) {
19024 for (uint32_t m = 1; m <= 1; m++) {
19025 GemmMicrokernelTester()
19026 .mr(1)
19027 .nr(4)
19028 .kr(2)
19029 .sr(1)
19030 .m(m)
19031 .n(n)
19032 .k(k)
19033 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019034 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019035 }
19036 }
19037 }
19038 }
19039
19040 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128, n_div_4) {
19041 TEST_REQUIRES_X86_AVX;
19042 for (uint32_t n = 8; n <= 12; n += 4) {
19043 for (size_t k = 1; k <= 40; k += 9) {
19044 GemmMicrokernelTester()
19045 .mr(1)
19046 .nr(4)
19047 .kr(2)
19048 .sr(1)
19049 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019050 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070019051 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080019052 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019053 }
19054 }
19055 }
19056
19057 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128, n_div_4_strided_cn) {
19058 TEST_REQUIRES_X86_AVX;
19059 for (uint32_t n = 8; n <= 12; n += 4) {
19060 for (size_t k = 1; k <= 40; k += 9) {
19061 GemmMicrokernelTester()
19062 .mr(1)
19063 .nr(4)
19064 .kr(2)
19065 .sr(1)
19066 .m(1)
19067 .n(n)
19068 .k(k)
19069 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080019070 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019071 }
19072 }
19073 }
19074
19075 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128, n_div_4_strided_a) {
19076 TEST_REQUIRES_X86_AVX;
19077 for (uint32_t n = 8; n <= 12; n += 4) {
19078 for (size_t k = 1; k <= 40; k += 9) {
19079 GemmMicrokernelTester()
19080 .mr(1)
19081 .nr(4)
19082 .kr(2)
19083 .sr(1)
19084 .m(1)
19085 .n(n)
19086 .k(k)
19087 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080019088 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019089 }
19090 }
19091 }
19092
19093 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128, n_div_4_subtile) {
19094 TEST_REQUIRES_X86_AVX;
19095 for (uint32_t n = 8; n <= 12; n += 4) {
19096 for (size_t k = 1; k <= 40; k += 9) {
19097 for (uint32_t m = 1; m <= 1; m++) {
19098 GemmMicrokernelTester()
19099 .mr(1)
19100 .nr(4)
19101 .kr(2)
19102 .sr(1)
19103 .m(m)
19104 .n(n)
19105 .k(k)
19106 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019107 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019108 }
19109 }
19110 }
19111 }
19112
19113 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128, strided_cm_subtile) {
19114 TEST_REQUIRES_X86_AVX;
19115 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019116 for (uint32_t n = 1; n <= 4; n++) {
19117 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070019118 GemmMicrokernelTester()
19119 .mr(1)
19120 .nr(4)
19121 .kr(2)
19122 .sr(1)
19123 .m(m)
19124 .n(n)
19125 .k(k)
19126 .cm_stride(7)
19127 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019128 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019129 }
19130 }
19131 }
19132 }
19133
19134 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128, qmin) {
19135 TEST_REQUIRES_X86_AVX;
19136 GemmMicrokernelTester()
19137 .mr(1)
19138 .nr(4)
19139 .kr(2)
19140 .sr(1)
19141 .m(1)
19142 .n(4)
19143 .k(8)
19144 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019145 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019146 }
19147
19148 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128, qmax) {
19149 TEST_REQUIRES_X86_AVX;
19150 GemmMicrokernelTester()
19151 .mr(1)
19152 .nr(4)
19153 .kr(2)
19154 .sr(1)
19155 .m(1)
19156 .n(4)
19157 .k(8)
19158 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019159 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019160 }
19161
19162 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__AVX_LD128, strided_cm) {
19163 TEST_REQUIRES_X86_AVX;
19164 GemmMicrokernelTester()
19165 .mr(1)
19166 .nr(4)
19167 .kr(2)
19168 .sr(1)
19169 .m(1)
19170 .n(4)
19171 .k(8)
19172 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080019173 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019174 }
19175#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
19176
19177
19178#if XNN_ARCH_X86 || XNN_ARCH_X86_64
19179 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_eq_8) {
19180 TEST_REQUIRES_X86_AVX;
19181 GemmMicrokernelTester()
19182 .mr(2)
19183 .nr(4)
19184 .kr(2)
19185 .sr(1)
19186 .m(2)
19187 .n(4)
19188 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080019189 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019190 }
19191
19192 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128, strided_cn) {
19193 TEST_REQUIRES_X86_AVX;
19194 GemmMicrokernelTester()
19195 .mr(2)
19196 .nr(4)
19197 .kr(2)
19198 .sr(1)
19199 .m(2)
19200 .n(4)
19201 .k(8)
19202 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080019203 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019204 }
19205
19206 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_eq_8_strided_a) {
19207 TEST_REQUIRES_X86_AVX;
19208 GemmMicrokernelTester()
19209 .mr(2)
19210 .nr(4)
19211 .kr(2)
19212 .sr(1)
19213 .m(2)
19214 .n(4)
19215 .k(8)
19216 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080019217 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019218 }
19219
19220 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_eq_8_subtile) {
19221 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080019222 for (uint32_t n = 1; n <= 4; n++) {
19223 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070019224 GemmMicrokernelTester()
19225 .mr(2)
19226 .nr(4)
19227 .kr(2)
19228 .sr(1)
19229 .m(m)
19230 .n(n)
19231 .k(8)
19232 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019233 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019234 }
19235 }
19236 }
19237
19238 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_eq_8_subtile_m) {
19239 TEST_REQUIRES_X86_AVX;
19240 for (uint32_t m = 1; m <= 2; m++) {
19241 GemmMicrokernelTester()
19242 .mr(2)
19243 .nr(4)
19244 .kr(2)
19245 .sr(1)
19246 .m(m)
19247 .n(4)
19248 .k(8)
19249 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019250 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019251 }
19252 }
19253
19254 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_eq_8_subtile_n) {
19255 TEST_REQUIRES_X86_AVX;
19256 for (uint32_t n = 1; n <= 4; n++) {
19257 GemmMicrokernelTester()
19258 .mr(2)
19259 .nr(4)
19260 .kr(2)
19261 .sr(1)
19262 .m(2)
19263 .n(n)
19264 .k(8)
19265 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019266 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019267 }
19268 }
19269
19270 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_lt_8) {
19271 TEST_REQUIRES_X86_AVX;
19272 for (size_t k = 1; k < 8; k++) {
19273 GemmMicrokernelTester()
19274 .mr(2)
19275 .nr(4)
19276 .kr(2)
19277 .sr(1)
19278 .m(2)
19279 .n(4)
19280 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080019281 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019282 }
19283 }
19284
19285 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_lt_8_strided_a) {
19286 TEST_REQUIRES_X86_AVX;
19287 for (size_t k = 1; k < 8; k++) {
19288 GemmMicrokernelTester()
19289 .mr(2)
19290 .nr(4)
19291 .kr(2)
19292 .sr(1)
19293 .m(2)
19294 .n(4)
19295 .k(k)
19296 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080019297 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019298 }
19299 }
19300
19301 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_lt_8_subtile) {
19302 TEST_REQUIRES_X86_AVX;
19303 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019304 for (uint32_t n = 1; n <= 4; n++) {
19305 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070019306 GemmMicrokernelTester()
19307 .mr(2)
19308 .nr(4)
19309 .kr(2)
19310 .sr(1)
19311 .m(m)
19312 .n(n)
19313 .k(k)
19314 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019315 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019316 }
19317 }
19318 }
19319 }
19320
19321 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_gt_8) {
19322 TEST_REQUIRES_X86_AVX;
19323 for (size_t k = 9; k < 16; k++) {
19324 GemmMicrokernelTester()
19325 .mr(2)
19326 .nr(4)
19327 .kr(2)
19328 .sr(1)
19329 .m(2)
19330 .n(4)
19331 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080019332 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019333 }
19334 }
19335
19336 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_gt_8_strided_a) {
19337 TEST_REQUIRES_X86_AVX;
19338 for (size_t k = 9; k < 16; k++) {
19339 GemmMicrokernelTester()
19340 .mr(2)
19341 .nr(4)
19342 .kr(2)
19343 .sr(1)
19344 .m(2)
19345 .n(4)
19346 .k(k)
19347 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080019348 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019349 }
19350 }
19351
19352 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_gt_8_subtile) {
19353 TEST_REQUIRES_X86_AVX;
19354 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019355 for (uint32_t n = 1; n <= 4; n++) {
19356 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070019357 GemmMicrokernelTester()
19358 .mr(2)
19359 .nr(4)
19360 .kr(2)
19361 .sr(1)
19362 .m(m)
19363 .n(n)
19364 .k(k)
19365 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019366 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019367 }
19368 }
19369 }
19370 }
19371
19372 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_div_8) {
19373 TEST_REQUIRES_X86_AVX;
19374 for (size_t k = 16; k <= 80; k += 8) {
19375 GemmMicrokernelTester()
19376 .mr(2)
19377 .nr(4)
19378 .kr(2)
19379 .sr(1)
19380 .m(2)
19381 .n(4)
19382 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080019383 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019384 }
19385 }
19386
19387 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_div_8_strided_a) {
19388 TEST_REQUIRES_X86_AVX;
19389 for (size_t k = 16; k <= 80; k += 8) {
19390 GemmMicrokernelTester()
19391 .mr(2)
19392 .nr(4)
19393 .kr(2)
19394 .sr(1)
19395 .m(2)
19396 .n(4)
19397 .k(k)
19398 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080019399 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019400 }
19401 }
19402
19403 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_div_8_subtile) {
19404 TEST_REQUIRES_X86_AVX;
19405 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019406 for (uint32_t n = 1; n <= 4; n++) {
19407 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070019408 GemmMicrokernelTester()
19409 .mr(2)
19410 .nr(4)
19411 .kr(2)
19412 .sr(1)
19413 .m(m)
19414 .n(n)
19415 .k(k)
19416 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019417 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019418 }
19419 }
19420 }
19421 }
19422
19423 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128, n_gt_4) {
19424 TEST_REQUIRES_X86_AVX;
19425 for (uint32_t n = 5; n < 8; n++) {
19426 for (size_t k = 1; k <= 40; k += 9) {
19427 GemmMicrokernelTester()
19428 .mr(2)
19429 .nr(4)
19430 .kr(2)
19431 .sr(1)
19432 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019433 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070019434 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080019435 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019436 }
19437 }
19438 }
19439
19440 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128, n_gt_4_strided_cn) {
19441 TEST_REQUIRES_X86_AVX;
19442 for (uint32_t n = 5; n < 8; n++) {
19443 for (size_t k = 1; k <= 40; k += 9) {
19444 GemmMicrokernelTester()
19445 .mr(2)
19446 .nr(4)
19447 .kr(2)
19448 .sr(1)
19449 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019450 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070019451 .k(k)
19452 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080019453 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019454 }
19455 }
19456 }
19457
19458 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128, n_gt_4_strided_a) {
19459 TEST_REQUIRES_X86_AVX;
19460 for (uint32_t n = 5; n < 8; n++) {
19461 for (size_t k = 1; k <= 40; k += 9) {
19462 GemmMicrokernelTester()
19463 .mr(2)
19464 .nr(4)
19465 .kr(2)
19466 .sr(1)
19467 .m(2)
19468 .n(n)
19469 .k(k)
19470 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080019471 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019472 }
19473 }
19474 }
19475
19476 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128, n_gt_4_subtile) {
19477 TEST_REQUIRES_X86_AVX;
19478 for (uint32_t n = 5; n < 8; n++) {
19479 for (size_t k = 1; k <= 40; k += 9) {
19480 for (uint32_t m = 1; m <= 2; m++) {
19481 GemmMicrokernelTester()
19482 .mr(2)
19483 .nr(4)
19484 .kr(2)
19485 .sr(1)
19486 .m(m)
19487 .n(n)
19488 .k(k)
19489 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019490 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019491 }
19492 }
19493 }
19494 }
19495
19496 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128, n_div_4) {
19497 TEST_REQUIRES_X86_AVX;
19498 for (uint32_t n = 8; n <= 12; n += 4) {
19499 for (size_t k = 1; k <= 40; k += 9) {
19500 GemmMicrokernelTester()
19501 .mr(2)
19502 .nr(4)
19503 .kr(2)
19504 .sr(1)
19505 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019506 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070019507 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080019508 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019509 }
19510 }
19511 }
19512
19513 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128, n_div_4_strided_cn) {
19514 TEST_REQUIRES_X86_AVX;
19515 for (uint32_t n = 8; n <= 12; n += 4) {
19516 for (size_t k = 1; k <= 40; k += 9) {
19517 GemmMicrokernelTester()
19518 .mr(2)
19519 .nr(4)
19520 .kr(2)
19521 .sr(1)
19522 .m(2)
19523 .n(n)
19524 .k(k)
19525 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080019526 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019527 }
19528 }
19529 }
19530
19531 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128, n_div_4_strided_a) {
19532 TEST_REQUIRES_X86_AVX;
19533 for (uint32_t n = 8; n <= 12; n += 4) {
19534 for (size_t k = 1; k <= 40; k += 9) {
19535 GemmMicrokernelTester()
19536 .mr(2)
19537 .nr(4)
19538 .kr(2)
19539 .sr(1)
19540 .m(2)
19541 .n(n)
19542 .k(k)
19543 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080019544 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019545 }
19546 }
19547 }
19548
19549 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128, n_div_4_subtile) {
19550 TEST_REQUIRES_X86_AVX;
19551 for (uint32_t n = 8; n <= 12; n += 4) {
19552 for (size_t k = 1; k <= 40; k += 9) {
19553 for (uint32_t m = 1; m <= 2; m++) {
19554 GemmMicrokernelTester()
19555 .mr(2)
19556 .nr(4)
19557 .kr(2)
19558 .sr(1)
19559 .m(m)
19560 .n(n)
19561 .k(k)
19562 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019563 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019564 }
19565 }
19566 }
19567 }
19568
19569 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128, strided_cm_subtile) {
19570 TEST_REQUIRES_X86_AVX;
19571 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019572 for (uint32_t n = 1; n <= 4; n++) {
19573 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070019574 GemmMicrokernelTester()
19575 .mr(2)
19576 .nr(4)
19577 .kr(2)
19578 .sr(1)
19579 .m(m)
19580 .n(n)
19581 .k(k)
19582 .cm_stride(7)
19583 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019584 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019585 }
19586 }
19587 }
19588 }
19589
19590 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128, qmin) {
19591 TEST_REQUIRES_X86_AVX;
19592 GemmMicrokernelTester()
19593 .mr(2)
19594 .nr(4)
19595 .kr(2)
19596 .sr(1)
19597 .m(2)
19598 .n(4)
19599 .k(8)
19600 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019601 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019602 }
19603
19604 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128, qmax) {
19605 TEST_REQUIRES_X86_AVX;
19606 GemmMicrokernelTester()
19607 .mr(2)
19608 .nr(4)
19609 .kr(2)
19610 .sr(1)
19611 .m(2)
19612 .n(4)
19613 .k(8)
19614 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019615 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019616 }
19617
19618 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__AVX_LD128, strided_cm) {
19619 TEST_REQUIRES_X86_AVX;
19620 GemmMicrokernelTester()
19621 .mr(2)
19622 .nr(4)
19623 .kr(2)
19624 .sr(1)
19625 .m(2)
19626 .n(4)
19627 .k(8)
19628 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080019629 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019630 }
19631#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
19632
19633
19634#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhanc46e6712021-06-01 19:00:16 -070019635 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_eq_8) {
19636 TEST_REQUIRES_X86_AVX;
19637 GemmMicrokernelTester()
19638 .mr(4)
19639 .nr(4)
19640 .kr(2)
19641 .sr(1)
19642 .m(4)
19643 .n(4)
19644 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080019645 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019646 }
19647
19648 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128, strided_cn) {
19649 TEST_REQUIRES_X86_AVX;
19650 GemmMicrokernelTester()
19651 .mr(4)
19652 .nr(4)
19653 .kr(2)
19654 .sr(1)
19655 .m(4)
19656 .n(4)
19657 .k(8)
19658 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080019659 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019660 }
19661
19662 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_eq_8_strided_a) {
19663 TEST_REQUIRES_X86_AVX;
19664 GemmMicrokernelTester()
19665 .mr(4)
19666 .nr(4)
19667 .kr(2)
19668 .sr(1)
19669 .m(4)
19670 .n(4)
19671 .k(8)
19672 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080019673 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019674 }
19675
19676 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_eq_8_subtile) {
19677 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080019678 for (uint32_t n = 1; n <= 4; n++) {
19679 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070019680 GemmMicrokernelTester()
19681 .mr(4)
19682 .nr(4)
19683 .kr(2)
19684 .sr(1)
19685 .m(m)
19686 .n(n)
19687 .k(8)
19688 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019689 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019690 }
19691 }
19692 }
19693
19694 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_eq_8_subtile_m) {
19695 TEST_REQUIRES_X86_AVX;
19696 for (uint32_t m = 1; m <= 4; m++) {
19697 GemmMicrokernelTester()
19698 .mr(4)
19699 .nr(4)
19700 .kr(2)
19701 .sr(1)
19702 .m(m)
19703 .n(4)
19704 .k(8)
19705 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019706 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019707 }
19708 }
19709
19710 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_eq_8_subtile_n) {
19711 TEST_REQUIRES_X86_AVX;
19712 for (uint32_t n = 1; n <= 4; n++) {
19713 GemmMicrokernelTester()
19714 .mr(4)
19715 .nr(4)
19716 .kr(2)
19717 .sr(1)
19718 .m(4)
19719 .n(n)
19720 .k(8)
19721 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019722 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019723 }
19724 }
19725
19726 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_lt_8) {
19727 TEST_REQUIRES_X86_AVX;
19728 for (size_t k = 1; k < 8; k++) {
19729 GemmMicrokernelTester()
19730 .mr(4)
19731 .nr(4)
19732 .kr(2)
19733 .sr(1)
19734 .m(4)
19735 .n(4)
19736 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080019737 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019738 }
19739 }
19740
19741 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_lt_8_strided_a) {
19742 TEST_REQUIRES_X86_AVX;
19743 for (size_t k = 1; k < 8; k++) {
19744 GemmMicrokernelTester()
19745 .mr(4)
19746 .nr(4)
19747 .kr(2)
19748 .sr(1)
19749 .m(4)
19750 .n(4)
19751 .k(k)
19752 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080019753 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019754 }
19755 }
19756
19757 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_lt_8_subtile) {
19758 TEST_REQUIRES_X86_AVX;
19759 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019760 for (uint32_t n = 1; n <= 4; n++) {
19761 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070019762 GemmMicrokernelTester()
19763 .mr(4)
19764 .nr(4)
19765 .kr(2)
19766 .sr(1)
19767 .m(m)
19768 .n(n)
19769 .k(k)
19770 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019771 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019772 }
19773 }
19774 }
19775 }
19776
19777 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_gt_8) {
19778 TEST_REQUIRES_X86_AVX;
19779 for (size_t k = 9; k < 16; k++) {
19780 GemmMicrokernelTester()
19781 .mr(4)
19782 .nr(4)
19783 .kr(2)
19784 .sr(1)
19785 .m(4)
19786 .n(4)
19787 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080019788 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019789 }
19790 }
19791
19792 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_gt_8_strided_a) {
19793 TEST_REQUIRES_X86_AVX;
19794 for (size_t k = 9; k < 16; k++) {
19795 GemmMicrokernelTester()
19796 .mr(4)
19797 .nr(4)
19798 .kr(2)
19799 .sr(1)
19800 .m(4)
19801 .n(4)
19802 .k(k)
19803 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080019804 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019805 }
19806 }
19807
19808 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_gt_8_subtile) {
19809 TEST_REQUIRES_X86_AVX;
19810 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019811 for (uint32_t n = 1; n <= 4; n++) {
19812 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070019813 GemmMicrokernelTester()
19814 .mr(4)
19815 .nr(4)
19816 .kr(2)
19817 .sr(1)
19818 .m(m)
19819 .n(n)
19820 .k(k)
19821 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019822 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019823 }
19824 }
19825 }
19826 }
19827
19828 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_div_8) {
19829 TEST_REQUIRES_X86_AVX;
19830 for (size_t k = 16; k <= 80; k += 8) {
19831 GemmMicrokernelTester()
19832 .mr(4)
19833 .nr(4)
19834 .kr(2)
19835 .sr(1)
19836 .m(4)
19837 .n(4)
19838 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080019839 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019840 }
19841 }
19842
19843 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_div_8_strided_a) {
19844 TEST_REQUIRES_X86_AVX;
19845 for (size_t k = 16; k <= 80; k += 8) {
19846 GemmMicrokernelTester()
19847 .mr(4)
19848 .nr(4)
19849 .kr(2)
19850 .sr(1)
19851 .m(4)
19852 .n(4)
19853 .k(k)
19854 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080019855 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019856 }
19857 }
19858
19859 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_div_8_subtile) {
19860 TEST_REQUIRES_X86_AVX;
19861 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019862 for (uint32_t n = 1; n <= 4; n++) {
19863 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070019864 GemmMicrokernelTester()
19865 .mr(4)
19866 .nr(4)
19867 .kr(2)
19868 .sr(1)
19869 .m(m)
19870 .n(n)
19871 .k(k)
19872 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019873 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019874 }
19875 }
19876 }
19877 }
19878
19879 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128, n_gt_4) {
19880 TEST_REQUIRES_X86_AVX;
19881 for (uint32_t n = 5; n < 8; n++) {
19882 for (size_t k = 1; k <= 40; k += 9) {
19883 GemmMicrokernelTester()
19884 .mr(4)
19885 .nr(4)
19886 .kr(2)
19887 .sr(1)
19888 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019889 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070019890 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080019891 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019892 }
19893 }
19894 }
19895
19896 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128, n_gt_4_strided_cn) {
19897 TEST_REQUIRES_X86_AVX;
19898 for (uint32_t n = 5; n < 8; n++) {
19899 for (size_t k = 1; k <= 40; k += 9) {
19900 GemmMicrokernelTester()
19901 .mr(4)
19902 .nr(4)
19903 .kr(2)
19904 .sr(1)
19905 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019906 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070019907 .k(k)
19908 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080019909 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019910 }
19911 }
19912 }
19913
19914 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128, n_gt_4_strided_a) {
19915 TEST_REQUIRES_X86_AVX;
19916 for (uint32_t n = 5; n < 8; n++) {
19917 for (size_t k = 1; k <= 40; k += 9) {
19918 GemmMicrokernelTester()
19919 .mr(4)
19920 .nr(4)
19921 .kr(2)
19922 .sr(1)
19923 .m(4)
19924 .n(n)
19925 .k(k)
19926 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080019927 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019928 }
19929 }
19930 }
19931
19932 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128, n_gt_4_subtile) {
19933 TEST_REQUIRES_X86_AVX;
19934 for (uint32_t n = 5; n < 8; n++) {
19935 for (size_t k = 1; k <= 40; k += 9) {
19936 for (uint32_t m = 1; m <= 4; m++) {
19937 GemmMicrokernelTester()
19938 .mr(4)
19939 .nr(4)
19940 .kr(2)
19941 .sr(1)
19942 .m(m)
19943 .n(n)
19944 .k(k)
19945 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019946 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019947 }
19948 }
19949 }
19950 }
19951
19952 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128, n_div_4) {
19953 TEST_REQUIRES_X86_AVX;
19954 for (uint32_t n = 8; n <= 12; n += 4) {
19955 for (size_t k = 1; k <= 40; k += 9) {
19956 GemmMicrokernelTester()
19957 .mr(4)
19958 .nr(4)
19959 .kr(2)
19960 .sr(1)
19961 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019962 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070019963 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080019964 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019965 }
19966 }
19967 }
19968
19969 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128, n_div_4_strided_cn) {
19970 TEST_REQUIRES_X86_AVX;
19971 for (uint32_t n = 8; n <= 12; n += 4) {
19972 for (size_t k = 1; k <= 40; k += 9) {
19973 GemmMicrokernelTester()
19974 .mr(4)
19975 .nr(4)
19976 .kr(2)
19977 .sr(1)
19978 .m(4)
19979 .n(n)
19980 .k(k)
19981 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080019982 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070019983 }
19984 }
19985 }
19986
19987 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128, n_div_4_strided_a) {
19988 TEST_REQUIRES_X86_AVX;
19989 for (uint32_t n = 8; n <= 12; n += 4) {
19990 for (size_t k = 1; k <= 40; k += 9) {
19991 GemmMicrokernelTester()
19992 .mr(4)
19993 .nr(4)
19994 .kr(2)
19995 .sr(1)
19996 .m(4)
19997 .n(n)
19998 .k(k)
19999 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080020000 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020001 }
20002 }
20003 }
20004
20005 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128, n_div_4_subtile) {
20006 TEST_REQUIRES_X86_AVX;
20007 for (uint32_t n = 8; n <= 12; n += 4) {
20008 for (size_t k = 1; k <= 40; k += 9) {
20009 for (uint32_t m = 1; m <= 4; m++) {
20010 GemmMicrokernelTester()
20011 .mr(4)
20012 .nr(4)
20013 .kr(2)
20014 .sr(1)
20015 .m(m)
20016 .n(n)
20017 .k(k)
20018 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020019 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020020 }
20021 }
20022 }
20023 }
20024
20025 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128, strided_cm_subtile) {
20026 TEST_REQUIRES_X86_AVX;
20027 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020028 for (uint32_t n = 1; n <= 4; n++) {
20029 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070020030 GemmMicrokernelTester()
20031 .mr(4)
20032 .nr(4)
20033 .kr(2)
20034 .sr(1)
20035 .m(m)
20036 .n(n)
20037 .k(k)
20038 .cm_stride(7)
20039 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020040 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020041 }
20042 }
20043 }
20044 }
20045
20046 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128, qmin) {
20047 TEST_REQUIRES_X86_AVX;
20048 GemmMicrokernelTester()
20049 .mr(4)
20050 .nr(4)
20051 .kr(2)
20052 .sr(1)
20053 .m(4)
20054 .n(4)
20055 .k(8)
20056 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020057 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020058 }
20059
20060 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128, qmax) {
20061 TEST_REQUIRES_X86_AVX;
20062 GemmMicrokernelTester()
20063 .mr(4)
20064 .nr(4)
20065 .kr(2)
20066 .sr(1)
20067 .m(4)
20068 .n(4)
20069 .k(8)
20070 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020071 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020072 }
20073
20074 TEST(QS8_GEMM_MINMAX_FP32_4X4C2__AVX_LD128, strided_cm) {
20075 TEST_REQUIRES_X86_AVX;
20076 GemmMicrokernelTester()
20077 .mr(4)
20078 .nr(4)
20079 .kr(2)
20080 .sr(1)
20081 .m(4)
20082 .n(4)
20083 .k(8)
20084 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080020085 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020086 }
20087#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
20088
20089
20090#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhanc46e6712021-06-01 19:00:16 -070020091 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_eq_8) {
20092 TEST_REQUIRES_X86_XOP;
20093 GemmMicrokernelTester()
20094 .mr(3)
20095 .nr(4)
20096 .kr(2)
20097 .sr(1)
20098 .m(3)
20099 .n(4)
20100 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080020101 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020102 }
20103
20104 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128, strided_cn) {
20105 TEST_REQUIRES_X86_XOP;
20106 GemmMicrokernelTester()
20107 .mr(3)
20108 .nr(4)
20109 .kr(2)
20110 .sr(1)
20111 .m(3)
20112 .n(4)
20113 .k(8)
20114 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080020115 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020116 }
20117
20118 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_eq_8_strided_a) {
20119 TEST_REQUIRES_X86_XOP;
20120 GemmMicrokernelTester()
20121 .mr(3)
20122 .nr(4)
20123 .kr(2)
20124 .sr(1)
20125 .m(3)
20126 .n(4)
20127 .k(8)
20128 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080020129 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020130 }
20131
20132 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_eq_8_subtile) {
20133 TEST_REQUIRES_X86_XOP;
Zhi An Ng83844ae2022-01-14 09:52:25 -080020134 for (uint32_t n = 1; n <= 4; n++) {
20135 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070020136 GemmMicrokernelTester()
20137 .mr(3)
20138 .nr(4)
20139 .kr(2)
20140 .sr(1)
20141 .m(m)
20142 .n(n)
20143 .k(8)
20144 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020145 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020146 }
20147 }
20148 }
20149
20150 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_eq_8_subtile_m) {
20151 TEST_REQUIRES_X86_XOP;
20152 for (uint32_t m = 1; m <= 3; m++) {
20153 GemmMicrokernelTester()
20154 .mr(3)
20155 .nr(4)
20156 .kr(2)
20157 .sr(1)
20158 .m(m)
20159 .n(4)
20160 .k(8)
20161 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020162 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020163 }
20164 }
20165
20166 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_eq_8_subtile_n) {
20167 TEST_REQUIRES_X86_XOP;
20168 for (uint32_t n = 1; n <= 4; n++) {
20169 GemmMicrokernelTester()
20170 .mr(3)
20171 .nr(4)
20172 .kr(2)
20173 .sr(1)
20174 .m(3)
20175 .n(n)
20176 .k(8)
20177 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020178 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020179 }
20180 }
20181
20182 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_lt_8) {
20183 TEST_REQUIRES_X86_XOP;
20184 for (size_t k = 1; k < 8; k++) {
20185 GemmMicrokernelTester()
20186 .mr(3)
20187 .nr(4)
20188 .kr(2)
20189 .sr(1)
20190 .m(3)
20191 .n(4)
20192 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080020193 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020194 }
20195 }
20196
20197 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_lt_8_strided_a) {
20198 TEST_REQUIRES_X86_XOP;
20199 for (size_t k = 1; k < 8; k++) {
20200 GemmMicrokernelTester()
20201 .mr(3)
20202 .nr(4)
20203 .kr(2)
20204 .sr(1)
20205 .m(3)
20206 .n(4)
20207 .k(k)
20208 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080020209 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020210 }
20211 }
20212
20213 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_lt_8_subtile) {
20214 TEST_REQUIRES_X86_XOP;
20215 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020216 for (uint32_t n = 1; n <= 4; n++) {
20217 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070020218 GemmMicrokernelTester()
20219 .mr(3)
20220 .nr(4)
20221 .kr(2)
20222 .sr(1)
20223 .m(m)
20224 .n(n)
20225 .k(k)
20226 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020227 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020228 }
20229 }
20230 }
20231 }
20232
20233 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_gt_8) {
20234 TEST_REQUIRES_X86_XOP;
20235 for (size_t k = 9; k < 16; k++) {
20236 GemmMicrokernelTester()
20237 .mr(3)
20238 .nr(4)
20239 .kr(2)
20240 .sr(1)
20241 .m(3)
20242 .n(4)
20243 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080020244 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020245 }
20246 }
20247
20248 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_gt_8_strided_a) {
20249 TEST_REQUIRES_X86_XOP;
20250 for (size_t k = 9; k < 16; k++) {
20251 GemmMicrokernelTester()
20252 .mr(3)
20253 .nr(4)
20254 .kr(2)
20255 .sr(1)
20256 .m(3)
20257 .n(4)
20258 .k(k)
20259 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080020260 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020261 }
20262 }
20263
20264 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_gt_8_subtile) {
20265 TEST_REQUIRES_X86_XOP;
20266 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020267 for (uint32_t n = 1; n <= 4; n++) {
20268 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070020269 GemmMicrokernelTester()
20270 .mr(3)
20271 .nr(4)
20272 .kr(2)
20273 .sr(1)
20274 .m(m)
20275 .n(n)
20276 .k(k)
20277 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020278 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020279 }
20280 }
20281 }
20282 }
20283
20284 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_div_8) {
20285 TEST_REQUIRES_X86_XOP;
20286 for (size_t k = 16; k <= 80; k += 8) {
20287 GemmMicrokernelTester()
20288 .mr(3)
20289 .nr(4)
20290 .kr(2)
20291 .sr(1)
20292 .m(3)
20293 .n(4)
20294 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080020295 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020296 }
20297 }
20298
20299 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_div_8_strided_a) {
20300 TEST_REQUIRES_X86_XOP;
20301 for (size_t k = 16; k <= 80; k += 8) {
20302 GemmMicrokernelTester()
20303 .mr(3)
20304 .nr(4)
20305 .kr(2)
20306 .sr(1)
20307 .m(3)
20308 .n(4)
20309 .k(k)
20310 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080020311 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020312 }
20313 }
20314
20315 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_div_8_subtile) {
20316 TEST_REQUIRES_X86_XOP;
20317 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020318 for (uint32_t n = 1; n <= 4; n++) {
20319 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070020320 GemmMicrokernelTester()
20321 .mr(3)
20322 .nr(4)
20323 .kr(2)
20324 .sr(1)
20325 .m(m)
20326 .n(n)
20327 .k(k)
20328 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020329 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020330 }
20331 }
20332 }
20333 }
20334
20335 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128, n_gt_4) {
20336 TEST_REQUIRES_X86_XOP;
20337 for (uint32_t n = 5; n < 8; n++) {
20338 for (size_t k = 1; k <= 40; k += 9) {
20339 GemmMicrokernelTester()
20340 .mr(3)
20341 .nr(4)
20342 .kr(2)
20343 .sr(1)
20344 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020345 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070020346 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080020347 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020348 }
20349 }
20350 }
20351
20352 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128, n_gt_4_strided_cn) {
20353 TEST_REQUIRES_X86_XOP;
20354 for (uint32_t n = 5; n < 8; n++) {
20355 for (size_t k = 1; k <= 40; k += 9) {
20356 GemmMicrokernelTester()
20357 .mr(3)
20358 .nr(4)
20359 .kr(2)
20360 .sr(1)
20361 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020362 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070020363 .k(k)
20364 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080020365 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020366 }
20367 }
20368 }
20369
20370 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128, n_gt_4_strided_a) {
20371 TEST_REQUIRES_X86_XOP;
20372 for (uint32_t n = 5; n < 8; n++) {
20373 for (size_t k = 1; k <= 40; k += 9) {
20374 GemmMicrokernelTester()
20375 .mr(3)
20376 .nr(4)
20377 .kr(2)
20378 .sr(1)
20379 .m(3)
20380 .n(n)
20381 .k(k)
20382 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080020383 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020384 }
20385 }
20386 }
20387
20388 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128, n_gt_4_subtile) {
20389 TEST_REQUIRES_X86_XOP;
20390 for (uint32_t n = 5; n < 8; n++) {
20391 for (size_t k = 1; k <= 40; k += 9) {
20392 for (uint32_t m = 1; m <= 3; m++) {
20393 GemmMicrokernelTester()
20394 .mr(3)
20395 .nr(4)
20396 .kr(2)
20397 .sr(1)
20398 .m(m)
20399 .n(n)
20400 .k(k)
20401 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020402 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020403 }
20404 }
20405 }
20406 }
20407
20408 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128, n_div_4) {
20409 TEST_REQUIRES_X86_XOP;
20410 for (uint32_t n = 8; n <= 12; n += 4) {
20411 for (size_t k = 1; k <= 40; k += 9) {
20412 GemmMicrokernelTester()
20413 .mr(3)
20414 .nr(4)
20415 .kr(2)
20416 .sr(1)
20417 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020418 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070020419 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080020420 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020421 }
20422 }
20423 }
20424
20425 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128, n_div_4_strided_cn) {
20426 TEST_REQUIRES_X86_XOP;
20427 for (uint32_t n = 8; n <= 12; n += 4) {
20428 for (size_t k = 1; k <= 40; k += 9) {
20429 GemmMicrokernelTester()
20430 .mr(3)
20431 .nr(4)
20432 .kr(2)
20433 .sr(1)
20434 .m(3)
20435 .n(n)
20436 .k(k)
20437 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080020438 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020439 }
20440 }
20441 }
20442
20443 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128, n_div_4_strided_a) {
20444 TEST_REQUIRES_X86_XOP;
20445 for (uint32_t n = 8; n <= 12; n += 4) {
20446 for (size_t k = 1; k <= 40; k += 9) {
20447 GemmMicrokernelTester()
20448 .mr(3)
20449 .nr(4)
20450 .kr(2)
20451 .sr(1)
20452 .m(3)
20453 .n(n)
20454 .k(k)
20455 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080020456 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020457 }
20458 }
20459 }
20460
20461 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128, n_div_4_subtile) {
20462 TEST_REQUIRES_X86_XOP;
20463 for (uint32_t n = 8; n <= 12; n += 4) {
20464 for (size_t k = 1; k <= 40; k += 9) {
20465 for (uint32_t m = 1; m <= 3; m++) {
20466 GemmMicrokernelTester()
20467 .mr(3)
20468 .nr(4)
20469 .kr(2)
20470 .sr(1)
20471 .m(m)
20472 .n(n)
20473 .k(k)
20474 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020475 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020476 }
20477 }
20478 }
20479 }
20480
20481 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128, strided_cm_subtile) {
20482 TEST_REQUIRES_X86_XOP;
20483 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020484 for (uint32_t n = 1; n <= 4; n++) {
20485 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070020486 GemmMicrokernelTester()
20487 .mr(3)
20488 .nr(4)
20489 .kr(2)
20490 .sr(1)
20491 .m(m)
20492 .n(n)
20493 .k(k)
20494 .cm_stride(7)
20495 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020496 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020497 }
20498 }
20499 }
20500 }
20501
20502 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128, qmin) {
20503 TEST_REQUIRES_X86_XOP;
20504 GemmMicrokernelTester()
20505 .mr(3)
20506 .nr(4)
20507 .kr(2)
20508 .sr(1)
20509 .m(3)
20510 .n(4)
20511 .k(8)
20512 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020513 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020514 }
20515
20516 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128, qmax) {
20517 TEST_REQUIRES_X86_XOP;
20518 GemmMicrokernelTester()
20519 .mr(3)
20520 .nr(4)
20521 .kr(2)
20522 .sr(1)
20523 .m(3)
20524 .n(4)
20525 .k(8)
20526 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020527 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020528 }
20529
20530 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__XOP_LD128, strided_cm) {
20531 TEST_REQUIRES_X86_XOP;
20532 GemmMicrokernelTester()
20533 .mr(3)
20534 .nr(4)
20535 .kr(2)
20536 .sr(1)
20537 .m(3)
20538 .n(4)
20539 .k(8)
20540 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080020541 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020542 }
20543#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
20544
20545
20546#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhanc46e6712021-06-01 19:00:16 -070020547 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_eq_8) {
20548 TEST_REQUIRES_X86_SSE2;
20549 GemmMicrokernelTester()
20550 .mr(3)
20551 .nr(4)
20552 .kr(8)
20553 .sr(1)
20554 .m(3)
20555 .n(4)
20556 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080020557 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020558 }
20559
20560 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64, strided_cn) {
20561 TEST_REQUIRES_X86_SSE2;
20562 GemmMicrokernelTester()
20563 .mr(3)
20564 .nr(4)
20565 .kr(8)
20566 .sr(1)
20567 .m(3)
20568 .n(4)
20569 .k(8)
20570 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080020571 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020572 }
20573
20574 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_eq_8_strided_a) {
20575 TEST_REQUIRES_X86_SSE2;
20576 GemmMicrokernelTester()
20577 .mr(3)
20578 .nr(4)
20579 .kr(8)
20580 .sr(1)
20581 .m(3)
20582 .n(4)
20583 .k(8)
20584 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080020585 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020586 }
20587
20588 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_eq_8_subtile) {
20589 TEST_REQUIRES_X86_SSE2;
Zhi An Ng83844ae2022-01-14 09:52:25 -080020590 for (uint32_t n = 1; n <= 4; n++) {
20591 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070020592 GemmMicrokernelTester()
20593 .mr(3)
20594 .nr(4)
20595 .kr(8)
20596 .sr(1)
20597 .m(m)
20598 .n(n)
20599 .k(8)
20600 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020601 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020602 }
20603 }
20604 }
20605
20606 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_eq_8_subtile_m) {
20607 TEST_REQUIRES_X86_SSE2;
20608 for (uint32_t m = 1; m <= 3; m++) {
20609 GemmMicrokernelTester()
20610 .mr(3)
20611 .nr(4)
20612 .kr(8)
20613 .sr(1)
20614 .m(m)
20615 .n(4)
20616 .k(8)
20617 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020618 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020619 }
20620 }
20621
20622 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_eq_8_subtile_n) {
20623 TEST_REQUIRES_X86_SSE2;
20624 for (uint32_t n = 1; n <= 4; n++) {
20625 GemmMicrokernelTester()
20626 .mr(3)
20627 .nr(4)
20628 .kr(8)
20629 .sr(1)
20630 .m(3)
20631 .n(n)
20632 .k(8)
20633 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020634 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020635 }
20636 }
20637
20638 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_lt_8) {
20639 TEST_REQUIRES_X86_SSE2;
20640 for (size_t k = 1; k < 8; k++) {
20641 GemmMicrokernelTester()
20642 .mr(3)
20643 .nr(4)
20644 .kr(8)
20645 .sr(1)
20646 .m(3)
20647 .n(4)
20648 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080020649 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020650 }
20651 }
20652
20653 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_lt_8_strided_a) {
20654 TEST_REQUIRES_X86_SSE2;
20655 for (size_t k = 1; k < 8; k++) {
20656 GemmMicrokernelTester()
20657 .mr(3)
20658 .nr(4)
20659 .kr(8)
20660 .sr(1)
20661 .m(3)
20662 .n(4)
20663 .k(k)
20664 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080020665 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020666 }
20667 }
20668
20669 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_lt_8_subtile) {
20670 TEST_REQUIRES_X86_SSE2;
20671 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020672 for (uint32_t n = 1; n <= 4; n++) {
20673 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070020674 GemmMicrokernelTester()
20675 .mr(3)
20676 .nr(4)
20677 .kr(8)
20678 .sr(1)
20679 .m(m)
20680 .n(n)
20681 .k(k)
20682 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020683 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020684 }
20685 }
20686 }
20687 }
20688
20689 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_gt_8) {
20690 TEST_REQUIRES_X86_SSE2;
20691 for (size_t k = 9; k < 16; k++) {
20692 GemmMicrokernelTester()
20693 .mr(3)
20694 .nr(4)
20695 .kr(8)
20696 .sr(1)
20697 .m(3)
20698 .n(4)
20699 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080020700 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020701 }
20702 }
20703
20704 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_gt_8_strided_a) {
20705 TEST_REQUIRES_X86_SSE2;
20706 for (size_t k = 9; k < 16; k++) {
20707 GemmMicrokernelTester()
20708 .mr(3)
20709 .nr(4)
20710 .kr(8)
20711 .sr(1)
20712 .m(3)
20713 .n(4)
20714 .k(k)
20715 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080020716 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020717 }
20718 }
20719
20720 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_gt_8_subtile) {
20721 TEST_REQUIRES_X86_SSE2;
20722 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020723 for (uint32_t n = 1; n <= 4; n++) {
20724 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070020725 GemmMicrokernelTester()
20726 .mr(3)
20727 .nr(4)
20728 .kr(8)
20729 .sr(1)
20730 .m(m)
20731 .n(n)
20732 .k(k)
20733 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020734 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020735 }
20736 }
20737 }
20738 }
20739
20740 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_div_8) {
20741 TEST_REQUIRES_X86_SSE2;
20742 for (size_t k = 16; k <= 80; k += 8) {
20743 GemmMicrokernelTester()
20744 .mr(3)
20745 .nr(4)
20746 .kr(8)
20747 .sr(1)
20748 .m(3)
20749 .n(4)
20750 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080020751 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020752 }
20753 }
20754
20755 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_div_8_strided_a) {
20756 TEST_REQUIRES_X86_SSE2;
20757 for (size_t k = 16; k <= 80; k += 8) {
20758 GemmMicrokernelTester()
20759 .mr(3)
20760 .nr(4)
20761 .kr(8)
20762 .sr(1)
20763 .m(3)
20764 .n(4)
20765 .k(k)
20766 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080020767 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020768 }
20769 }
20770
20771 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_div_8_subtile) {
20772 TEST_REQUIRES_X86_SSE2;
20773 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020774 for (uint32_t n = 1; n <= 4; n++) {
20775 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070020776 GemmMicrokernelTester()
20777 .mr(3)
20778 .nr(4)
20779 .kr(8)
20780 .sr(1)
20781 .m(m)
20782 .n(n)
20783 .k(k)
20784 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020785 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020786 }
20787 }
20788 }
20789 }
20790
20791 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64, n_gt_4) {
20792 TEST_REQUIRES_X86_SSE2;
20793 for (uint32_t n = 5; n < 8; n++) {
20794 for (size_t k = 1; k <= 40; k += 9) {
20795 GemmMicrokernelTester()
20796 .mr(3)
20797 .nr(4)
20798 .kr(8)
20799 .sr(1)
20800 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020801 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070020802 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080020803 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020804 }
20805 }
20806 }
20807
20808 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64, n_gt_4_strided_cn) {
20809 TEST_REQUIRES_X86_SSE2;
20810 for (uint32_t n = 5; n < 8; n++) {
20811 for (size_t k = 1; k <= 40; k += 9) {
20812 GemmMicrokernelTester()
20813 .mr(3)
20814 .nr(4)
20815 .kr(8)
20816 .sr(1)
20817 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020818 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070020819 .k(k)
20820 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080020821 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020822 }
20823 }
20824 }
20825
20826 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64, n_gt_4_strided_a) {
20827 TEST_REQUIRES_X86_SSE2;
20828 for (uint32_t n = 5; n < 8; n++) {
20829 for (size_t k = 1; k <= 40; k += 9) {
20830 GemmMicrokernelTester()
20831 .mr(3)
20832 .nr(4)
20833 .kr(8)
20834 .sr(1)
20835 .m(3)
20836 .n(n)
20837 .k(k)
20838 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080020839 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020840 }
20841 }
20842 }
20843
20844 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64, n_gt_4_subtile) {
20845 TEST_REQUIRES_X86_SSE2;
20846 for (uint32_t n = 5; n < 8; n++) {
20847 for (size_t k = 1; k <= 40; k += 9) {
20848 for (uint32_t m = 1; m <= 3; m++) {
20849 GemmMicrokernelTester()
20850 .mr(3)
20851 .nr(4)
20852 .kr(8)
20853 .sr(1)
20854 .m(m)
20855 .n(n)
20856 .k(k)
20857 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020858 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020859 }
20860 }
20861 }
20862 }
20863
20864 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64, n_div_4) {
20865 TEST_REQUIRES_X86_SSE2;
20866 for (uint32_t n = 8; n <= 12; n += 4) {
20867 for (size_t k = 1; k <= 40; k += 9) {
20868 GemmMicrokernelTester()
20869 .mr(3)
20870 .nr(4)
20871 .kr(8)
20872 .sr(1)
20873 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020874 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070020875 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080020876 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020877 }
20878 }
20879 }
20880
20881 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64, n_div_4_strided_cn) {
20882 TEST_REQUIRES_X86_SSE2;
20883 for (uint32_t n = 8; n <= 12; n += 4) {
20884 for (size_t k = 1; k <= 40; k += 9) {
20885 GemmMicrokernelTester()
20886 .mr(3)
20887 .nr(4)
20888 .kr(8)
20889 .sr(1)
20890 .m(3)
20891 .n(n)
20892 .k(k)
20893 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080020894 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020895 }
20896 }
20897 }
20898
20899 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64, n_div_4_strided_a) {
20900 TEST_REQUIRES_X86_SSE2;
20901 for (uint32_t n = 8; n <= 12; n += 4) {
20902 for (size_t k = 1; k <= 40; k += 9) {
20903 GemmMicrokernelTester()
20904 .mr(3)
20905 .nr(4)
20906 .kr(8)
20907 .sr(1)
20908 .m(3)
20909 .n(n)
20910 .k(k)
20911 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080020912 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020913 }
20914 }
20915 }
20916
20917 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64, n_div_4_subtile) {
20918 TEST_REQUIRES_X86_SSE2;
20919 for (uint32_t n = 8; n <= 12; n += 4) {
20920 for (size_t k = 1; k <= 40; k += 9) {
20921 for (uint32_t m = 1; m <= 3; m++) {
20922 GemmMicrokernelTester()
20923 .mr(3)
20924 .nr(4)
20925 .kr(8)
20926 .sr(1)
20927 .m(m)
20928 .n(n)
20929 .k(k)
20930 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020931 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020932 }
20933 }
20934 }
20935 }
20936
20937 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64, strided_cm_subtile) {
20938 TEST_REQUIRES_X86_SSE2;
20939 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020940 for (uint32_t n = 1; n <= 4; n++) {
20941 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070020942 GemmMicrokernelTester()
20943 .mr(3)
20944 .nr(4)
20945 .kr(8)
20946 .sr(1)
20947 .m(m)
20948 .n(n)
20949 .k(k)
20950 .cm_stride(7)
20951 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020952 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020953 }
20954 }
20955 }
20956 }
20957
20958 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64, qmin) {
20959 TEST_REQUIRES_X86_SSE2;
20960 GemmMicrokernelTester()
20961 .mr(3)
20962 .nr(4)
20963 .kr(8)
20964 .sr(1)
20965 .m(3)
20966 .n(4)
20967 .k(8)
20968 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020969 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020970 }
20971
20972 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64, qmax) {
20973 TEST_REQUIRES_X86_SSE2;
20974 GemmMicrokernelTester()
20975 .mr(3)
20976 .nr(4)
20977 .kr(8)
20978 .sr(1)
20979 .m(3)
20980 .n(4)
20981 .k(8)
20982 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020983 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020984 }
20985
20986 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD64, strided_cm) {
20987 TEST_REQUIRES_X86_SSE2;
20988 GemmMicrokernelTester()
20989 .mr(3)
20990 .nr(4)
20991 .kr(8)
20992 .sr(1)
20993 .m(3)
20994 .n(4)
20995 .k(8)
20996 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080020997 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070020998 }
20999#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
21000
21001
21002#if XNN_ARCH_X86 || XNN_ARCH_X86_64
21003 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSSE3_LD64, k_eq_8) {
21004 TEST_REQUIRES_X86_SSSE3;
21005 GemmMicrokernelTester()
21006 .mr(1)
21007 .nr(4)
21008 .kr(8)
21009 .sr(1)
21010 .m(1)
21011 .n(4)
21012 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080021013 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021014 }
21015
21016 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSSE3_LD64, strided_cn) {
21017 TEST_REQUIRES_X86_SSSE3;
21018 GemmMicrokernelTester()
21019 .mr(1)
21020 .nr(4)
21021 .kr(8)
21022 .sr(1)
21023 .m(1)
21024 .n(4)
21025 .k(8)
21026 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080021027 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021028 }
21029
21030 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSSE3_LD64, k_eq_8_strided_a) {
21031 TEST_REQUIRES_X86_SSSE3;
21032 GemmMicrokernelTester()
21033 .mr(1)
21034 .nr(4)
21035 .kr(8)
21036 .sr(1)
21037 .m(1)
21038 .n(4)
21039 .k(8)
21040 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080021041 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021042 }
21043
21044 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSSE3_LD64, k_eq_8_subtile) {
21045 TEST_REQUIRES_X86_SSSE3;
Zhi An Ng83844ae2022-01-14 09:52:25 -080021046 for (uint32_t n = 1; n <= 4; n++) {
21047 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070021048 GemmMicrokernelTester()
21049 .mr(1)
21050 .nr(4)
21051 .kr(8)
21052 .sr(1)
21053 .m(m)
21054 .n(n)
21055 .k(8)
21056 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021057 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021058 }
21059 }
21060 }
21061
21062 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSSE3_LD64, k_eq_8_subtile_m) {
21063 TEST_REQUIRES_X86_SSSE3;
21064 for (uint32_t m = 1; m <= 1; m++) {
21065 GemmMicrokernelTester()
21066 .mr(1)
21067 .nr(4)
21068 .kr(8)
21069 .sr(1)
21070 .m(m)
21071 .n(4)
21072 .k(8)
21073 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021074 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021075 }
21076 }
21077
21078 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSSE3_LD64, k_eq_8_subtile_n) {
21079 TEST_REQUIRES_X86_SSSE3;
21080 for (uint32_t n = 1; n <= 4; n++) {
21081 GemmMicrokernelTester()
21082 .mr(1)
21083 .nr(4)
21084 .kr(8)
21085 .sr(1)
21086 .m(1)
21087 .n(n)
21088 .k(8)
21089 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021090 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021091 }
21092 }
21093
21094 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSSE3_LD64, k_lt_8) {
21095 TEST_REQUIRES_X86_SSSE3;
21096 for (size_t k = 1; k < 8; k++) {
21097 GemmMicrokernelTester()
21098 .mr(1)
21099 .nr(4)
21100 .kr(8)
21101 .sr(1)
21102 .m(1)
21103 .n(4)
21104 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080021105 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021106 }
21107 }
21108
21109 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSSE3_LD64, k_lt_8_strided_a) {
21110 TEST_REQUIRES_X86_SSSE3;
21111 for (size_t k = 1; k < 8; k++) {
21112 GemmMicrokernelTester()
21113 .mr(1)
21114 .nr(4)
21115 .kr(8)
21116 .sr(1)
21117 .m(1)
21118 .n(4)
21119 .k(k)
21120 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080021121 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021122 }
21123 }
21124
21125 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSSE3_LD64, k_lt_8_subtile) {
21126 TEST_REQUIRES_X86_SSSE3;
21127 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021128 for (uint32_t n = 1; n <= 4; n++) {
21129 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070021130 GemmMicrokernelTester()
21131 .mr(1)
21132 .nr(4)
21133 .kr(8)
21134 .sr(1)
21135 .m(m)
21136 .n(n)
21137 .k(k)
21138 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021139 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021140 }
21141 }
21142 }
21143 }
21144
21145 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSSE3_LD64, k_gt_8) {
21146 TEST_REQUIRES_X86_SSSE3;
21147 for (size_t k = 9; k < 16; k++) {
21148 GemmMicrokernelTester()
21149 .mr(1)
21150 .nr(4)
21151 .kr(8)
21152 .sr(1)
21153 .m(1)
21154 .n(4)
21155 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080021156 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021157 }
21158 }
21159
21160 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSSE3_LD64, k_gt_8_strided_a) {
21161 TEST_REQUIRES_X86_SSSE3;
21162 for (size_t k = 9; k < 16; k++) {
21163 GemmMicrokernelTester()
21164 .mr(1)
21165 .nr(4)
21166 .kr(8)
21167 .sr(1)
21168 .m(1)
21169 .n(4)
21170 .k(k)
21171 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080021172 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021173 }
21174 }
21175
21176 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSSE3_LD64, k_gt_8_subtile) {
21177 TEST_REQUIRES_X86_SSSE3;
21178 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021179 for (uint32_t n = 1; n <= 4; n++) {
21180 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070021181 GemmMicrokernelTester()
21182 .mr(1)
21183 .nr(4)
21184 .kr(8)
21185 .sr(1)
21186 .m(m)
21187 .n(n)
21188 .k(k)
21189 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021190 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021191 }
21192 }
21193 }
21194 }
21195
21196 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSSE3_LD64, k_div_8) {
21197 TEST_REQUIRES_X86_SSSE3;
21198 for (size_t k = 16; k <= 80; k += 8) {
21199 GemmMicrokernelTester()
21200 .mr(1)
21201 .nr(4)
21202 .kr(8)
21203 .sr(1)
21204 .m(1)
21205 .n(4)
21206 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080021207 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021208 }
21209 }
21210
21211 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSSE3_LD64, k_div_8_strided_a) {
21212 TEST_REQUIRES_X86_SSSE3;
21213 for (size_t k = 16; k <= 80; k += 8) {
21214 GemmMicrokernelTester()
21215 .mr(1)
21216 .nr(4)
21217 .kr(8)
21218 .sr(1)
21219 .m(1)
21220 .n(4)
21221 .k(k)
21222 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080021223 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021224 }
21225 }
21226
21227 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSSE3_LD64, k_div_8_subtile) {
21228 TEST_REQUIRES_X86_SSSE3;
21229 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021230 for (uint32_t n = 1; n <= 4; n++) {
21231 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070021232 GemmMicrokernelTester()
21233 .mr(1)
21234 .nr(4)
21235 .kr(8)
21236 .sr(1)
21237 .m(m)
21238 .n(n)
21239 .k(k)
21240 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021241 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021242 }
21243 }
21244 }
21245 }
21246
21247 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSSE3_LD64, n_gt_4) {
21248 TEST_REQUIRES_X86_SSSE3;
21249 for (uint32_t n = 5; n < 8; n++) {
21250 for (size_t k = 1; k <= 40; k += 9) {
21251 GemmMicrokernelTester()
21252 .mr(1)
21253 .nr(4)
21254 .kr(8)
21255 .sr(1)
21256 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021257 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070021258 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080021259 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021260 }
21261 }
21262 }
21263
21264 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSSE3_LD64, n_gt_4_strided_cn) {
21265 TEST_REQUIRES_X86_SSSE3;
21266 for (uint32_t n = 5; n < 8; n++) {
21267 for (size_t k = 1; k <= 40; k += 9) {
21268 GemmMicrokernelTester()
21269 .mr(1)
21270 .nr(4)
21271 .kr(8)
21272 .sr(1)
21273 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021274 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070021275 .k(k)
21276 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080021277 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021278 }
21279 }
21280 }
21281
21282 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSSE3_LD64, n_gt_4_strided_a) {
21283 TEST_REQUIRES_X86_SSSE3;
21284 for (uint32_t n = 5; n < 8; n++) {
21285 for (size_t k = 1; k <= 40; k += 9) {
21286 GemmMicrokernelTester()
21287 .mr(1)
21288 .nr(4)
21289 .kr(8)
21290 .sr(1)
21291 .m(1)
21292 .n(n)
21293 .k(k)
21294 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080021295 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021296 }
21297 }
21298 }
21299
21300 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSSE3_LD64, n_gt_4_subtile) {
21301 TEST_REQUIRES_X86_SSSE3;
21302 for (uint32_t n = 5; n < 8; n++) {
21303 for (size_t k = 1; k <= 40; k += 9) {
21304 for (uint32_t m = 1; m <= 1; m++) {
21305 GemmMicrokernelTester()
21306 .mr(1)
21307 .nr(4)
21308 .kr(8)
21309 .sr(1)
21310 .m(m)
21311 .n(n)
21312 .k(k)
21313 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021314 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021315 }
21316 }
21317 }
21318 }
21319
21320 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSSE3_LD64, n_div_4) {
21321 TEST_REQUIRES_X86_SSSE3;
21322 for (uint32_t n = 8; n <= 12; n += 4) {
21323 for (size_t k = 1; k <= 40; k += 9) {
21324 GemmMicrokernelTester()
21325 .mr(1)
21326 .nr(4)
21327 .kr(8)
21328 .sr(1)
21329 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021330 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070021331 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080021332 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021333 }
21334 }
21335 }
21336
21337 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSSE3_LD64, n_div_4_strided_cn) {
21338 TEST_REQUIRES_X86_SSSE3;
21339 for (uint32_t n = 8; n <= 12; n += 4) {
21340 for (size_t k = 1; k <= 40; k += 9) {
21341 GemmMicrokernelTester()
21342 .mr(1)
21343 .nr(4)
21344 .kr(8)
21345 .sr(1)
21346 .m(1)
21347 .n(n)
21348 .k(k)
21349 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080021350 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021351 }
21352 }
21353 }
21354
21355 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSSE3_LD64, n_div_4_strided_a) {
21356 TEST_REQUIRES_X86_SSSE3;
21357 for (uint32_t n = 8; n <= 12; n += 4) {
21358 for (size_t k = 1; k <= 40; k += 9) {
21359 GemmMicrokernelTester()
21360 .mr(1)
21361 .nr(4)
21362 .kr(8)
21363 .sr(1)
21364 .m(1)
21365 .n(n)
21366 .k(k)
21367 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080021368 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021369 }
21370 }
21371 }
21372
21373 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSSE3_LD64, n_div_4_subtile) {
21374 TEST_REQUIRES_X86_SSSE3;
21375 for (uint32_t n = 8; n <= 12; n += 4) {
21376 for (size_t k = 1; k <= 40; k += 9) {
21377 for (uint32_t m = 1; m <= 1; m++) {
21378 GemmMicrokernelTester()
21379 .mr(1)
21380 .nr(4)
21381 .kr(8)
21382 .sr(1)
21383 .m(m)
21384 .n(n)
21385 .k(k)
21386 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021387 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021388 }
21389 }
21390 }
21391 }
21392
21393 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSSE3_LD64, strided_cm_subtile) {
21394 TEST_REQUIRES_X86_SSSE3;
21395 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021396 for (uint32_t n = 1; n <= 4; n++) {
21397 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070021398 GemmMicrokernelTester()
21399 .mr(1)
21400 .nr(4)
21401 .kr(8)
21402 .sr(1)
21403 .m(m)
21404 .n(n)
21405 .k(k)
21406 .cm_stride(7)
21407 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021408 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021409 }
21410 }
21411 }
21412 }
21413
21414 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSSE3_LD64, qmin) {
21415 TEST_REQUIRES_X86_SSSE3;
21416 GemmMicrokernelTester()
21417 .mr(1)
21418 .nr(4)
21419 .kr(8)
21420 .sr(1)
21421 .m(1)
21422 .n(4)
21423 .k(8)
21424 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021425 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021426 }
21427
21428 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSSE3_LD64, qmax) {
21429 TEST_REQUIRES_X86_SSSE3;
21430 GemmMicrokernelTester()
21431 .mr(1)
21432 .nr(4)
21433 .kr(8)
21434 .sr(1)
21435 .m(1)
21436 .n(4)
21437 .k(8)
21438 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021439 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021440 }
21441
21442 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSSE3_LD64, strided_cm) {
21443 TEST_REQUIRES_X86_SSSE3;
21444 GemmMicrokernelTester()
21445 .mr(1)
21446 .nr(4)
21447 .kr(8)
21448 .sr(1)
21449 .m(1)
21450 .n(4)
21451 .k(8)
21452 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080021453 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021454 }
21455#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
21456
21457
21458#if XNN_ARCH_X86 || XNN_ARCH_X86_64
21459 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSSE3_LD64, k_eq_8) {
21460 TEST_REQUIRES_X86_SSSE3;
21461 GemmMicrokernelTester()
21462 .mr(2)
21463 .nr(4)
21464 .kr(8)
21465 .sr(1)
21466 .m(2)
21467 .n(4)
21468 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080021469 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021470 }
21471
21472 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSSE3_LD64, strided_cn) {
21473 TEST_REQUIRES_X86_SSSE3;
21474 GemmMicrokernelTester()
21475 .mr(2)
21476 .nr(4)
21477 .kr(8)
21478 .sr(1)
21479 .m(2)
21480 .n(4)
21481 .k(8)
21482 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080021483 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021484 }
21485
21486 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSSE3_LD64, k_eq_8_strided_a) {
21487 TEST_REQUIRES_X86_SSSE3;
21488 GemmMicrokernelTester()
21489 .mr(2)
21490 .nr(4)
21491 .kr(8)
21492 .sr(1)
21493 .m(2)
21494 .n(4)
21495 .k(8)
21496 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080021497 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021498 }
21499
21500 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSSE3_LD64, k_eq_8_subtile) {
21501 TEST_REQUIRES_X86_SSSE3;
Zhi An Ng83844ae2022-01-14 09:52:25 -080021502 for (uint32_t n = 1; n <= 4; n++) {
21503 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070021504 GemmMicrokernelTester()
21505 .mr(2)
21506 .nr(4)
21507 .kr(8)
21508 .sr(1)
21509 .m(m)
21510 .n(n)
21511 .k(8)
21512 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021513 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021514 }
21515 }
21516 }
21517
21518 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSSE3_LD64, k_eq_8_subtile_m) {
21519 TEST_REQUIRES_X86_SSSE3;
21520 for (uint32_t m = 1; m <= 2; m++) {
21521 GemmMicrokernelTester()
21522 .mr(2)
21523 .nr(4)
21524 .kr(8)
21525 .sr(1)
21526 .m(m)
21527 .n(4)
21528 .k(8)
21529 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021530 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021531 }
21532 }
21533
21534 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSSE3_LD64, k_eq_8_subtile_n) {
21535 TEST_REQUIRES_X86_SSSE3;
21536 for (uint32_t n = 1; n <= 4; n++) {
21537 GemmMicrokernelTester()
21538 .mr(2)
21539 .nr(4)
21540 .kr(8)
21541 .sr(1)
21542 .m(2)
21543 .n(n)
21544 .k(8)
21545 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021546 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021547 }
21548 }
21549
21550 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSSE3_LD64, k_lt_8) {
21551 TEST_REQUIRES_X86_SSSE3;
21552 for (size_t k = 1; k < 8; k++) {
21553 GemmMicrokernelTester()
21554 .mr(2)
21555 .nr(4)
21556 .kr(8)
21557 .sr(1)
21558 .m(2)
21559 .n(4)
21560 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080021561 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021562 }
21563 }
21564
21565 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSSE3_LD64, k_lt_8_strided_a) {
21566 TEST_REQUIRES_X86_SSSE3;
21567 for (size_t k = 1; k < 8; k++) {
21568 GemmMicrokernelTester()
21569 .mr(2)
21570 .nr(4)
21571 .kr(8)
21572 .sr(1)
21573 .m(2)
21574 .n(4)
21575 .k(k)
21576 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080021577 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021578 }
21579 }
21580
21581 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSSE3_LD64, k_lt_8_subtile) {
21582 TEST_REQUIRES_X86_SSSE3;
21583 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021584 for (uint32_t n = 1; n <= 4; n++) {
21585 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070021586 GemmMicrokernelTester()
21587 .mr(2)
21588 .nr(4)
21589 .kr(8)
21590 .sr(1)
21591 .m(m)
21592 .n(n)
21593 .k(k)
21594 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021595 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021596 }
21597 }
21598 }
21599 }
21600
21601 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSSE3_LD64, k_gt_8) {
21602 TEST_REQUIRES_X86_SSSE3;
21603 for (size_t k = 9; k < 16; k++) {
21604 GemmMicrokernelTester()
21605 .mr(2)
21606 .nr(4)
21607 .kr(8)
21608 .sr(1)
21609 .m(2)
21610 .n(4)
21611 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080021612 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021613 }
21614 }
21615
21616 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSSE3_LD64, k_gt_8_strided_a) {
21617 TEST_REQUIRES_X86_SSSE3;
21618 for (size_t k = 9; k < 16; k++) {
21619 GemmMicrokernelTester()
21620 .mr(2)
21621 .nr(4)
21622 .kr(8)
21623 .sr(1)
21624 .m(2)
21625 .n(4)
21626 .k(k)
21627 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080021628 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021629 }
21630 }
21631
21632 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSSE3_LD64, k_gt_8_subtile) {
21633 TEST_REQUIRES_X86_SSSE3;
21634 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021635 for (uint32_t n = 1; n <= 4; n++) {
21636 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070021637 GemmMicrokernelTester()
21638 .mr(2)
21639 .nr(4)
21640 .kr(8)
21641 .sr(1)
21642 .m(m)
21643 .n(n)
21644 .k(k)
21645 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021646 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021647 }
21648 }
21649 }
21650 }
21651
21652 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSSE3_LD64, k_div_8) {
21653 TEST_REQUIRES_X86_SSSE3;
21654 for (size_t k = 16; k <= 80; k += 8) {
21655 GemmMicrokernelTester()
21656 .mr(2)
21657 .nr(4)
21658 .kr(8)
21659 .sr(1)
21660 .m(2)
21661 .n(4)
21662 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080021663 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021664 }
21665 }
21666
21667 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSSE3_LD64, k_div_8_strided_a) {
21668 TEST_REQUIRES_X86_SSSE3;
21669 for (size_t k = 16; k <= 80; k += 8) {
21670 GemmMicrokernelTester()
21671 .mr(2)
21672 .nr(4)
21673 .kr(8)
21674 .sr(1)
21675 .m(2)
21676 .n(4)
21677 .k(k)
21678 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080021679 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021680 }
21681 }
21682
21683 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSSE3_LD64, k_div_8_subtile) {
21684 TEST_REQUIRES_X86_SSSE3;
21685 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021686 for (uint32_t n = 1; n <= 4; n++) {
21687 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070021688 GemmMicrokernelTester()
21689 .mr(2)
21690 .nr(4)
21691 .kr(8)
21692 .sr(1)
21693 .m(m)
21694 .n(n)
21695 .k(k)
21696 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021697 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021698 }
21699 }
21700 }
21701 }
21702
21703 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSSE3_LD64, n_gt_4) {
21704 TEST_REQUIRES_X86_SSSE3;
21705 for (uint32_t n = 5; n < 8; n++) {
21706 for (size_t k = 1; k <= 40; k += 9) {
21707 GemmMicrokernelTester()
21708 .mr(2)
21709 .nr(4)
21710 .kr(8)
21711 .sr(1)
21712 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021713 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070021714 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080021715 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021716 }
21717 }
21718 }
21719
21720 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSSE3_LD64, n_gt_4_strided_cn) {
21721 TEST_REQUIRES_X86_SSSE3;
21722 for (uint32_t n = 5; n < 8; n++) {
21723 for (size_t k = 1; k <= 40; k += 9) {
21724 GemmMicrokernelTester()
21725 .mr(2)
21726 .nr(4)
21727 .kr(8)
21728 .sr(1)
21729 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021730 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070021731 .k(k)
21732 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080021733 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021734 }
21735 }
21736 }
21737
21738 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSSE3_LD64, n_gt_4_strided_a) {
21739 TEST_REQUIRES_X86_SSSE3;
21740 for (uint32_t n = 5; n < 8; n++) {
21741 for (size_t k = 1; k <= 40; k += 9) {
21742 GemmMicrokernelTester()
21743 .mr(2)
21744 .nr(4)
21745 .kr(8)
21746 .sr(1)
21747 .m(2)
21748 .n(n)
21749 .k(k)
21750 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080021751 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021752 }
21753 }
21754 }
21755
21756 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSSE3_LD64, n_gt_4_subtile) {
21757 TEST_REQUIRES_X86_SSSE3;
21758 for (uint32_t n = 5; n < 8; n++) {
21759 for (size_t k = 1; k <= 40; k += 9) {
21760 for (uint32_t m = 1; m <= 2; m++) {
21761 GemmMicrokernelTester()
21762 .mr(2)
21763 .nr(4)
21764 .kr(8)
21765 .sr(1)
21766 .m(m)
21767 .n(n)
21768 .k(k)
21769 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021770 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021771 }
21772 }
21773 }
21774 }
21775
21776 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSSE3_LD64, n_div_4) {
21777 TEST_REQUIRES_X86_SSSE3;
21778 for (uint32_t n = 8; n <= 12; n += 4) {
21779 for (size_t k = 1; k <= 40; k += 9) {
21780 GemmMicrokernelTester()
21781 .mr(2)
21782 .nr(4)
21783 .kr(8)
21784 .sr(1)
21785 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021786 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070021787 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080021788 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021789 }
21790 }
21791 }
21792
21793 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSSE3_LD64, n_div_4_strided_cn) {
21794 TEST_REQUIRES_X86_SSSE3;
21795 for (uint32_t n = 8; n <= 12; n += 4) {
21796 for (size_t k = 1; k <= 40; k += 9) {
21797 GemmMicrokernelTester()
21798 .mr(2)
21799 .nr(4)
21800 .kr(8)
21801 .sr(1)
21802 .m(2)
21803 .n(n)
21804 .k(k)
21805 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080021806 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021807 }
21808 }
21809 }
21810
21811 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSSE3_LD64, n_div_4_strided_a) {
21812 TEST_REQUIRES_X86_SSSE3;
21813 for (uint32_t n = 8; n <= 12; n += 4) {
21814 for (size_t k = 1; k <= 40; k += 9) {
21815 GemmMicrokernelTester()
21816 .mr(2)
21817 .nr(4)
21818 .kr(8)
21819 .sr(1)
21820 .m(2)
21821 .n(n)
21822 .k(k)
21823 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080021824 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021825 }
21826 }
21827 }
21828
21829 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSSE3_LD64, n_div_4_subtile) {
21830 TEST_REQUIRES_X86_SSSE3;
21831 for (uint32_t n = 8; n <= 12; n += 4) {
21832 for (size_t k = 1; k <= 40; k += 9) {
21833 for (uint32_t m = 1; m <= 2; m++) {
21834 GemmMicrokernelTester()
21835 .mr(2)
21836 .nr(4)
21837 .kr(8)
21838 .sr(1)
21839 .m(m)
21840 .n(n)
21841 .k(k)
21842 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021843 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021844 }
21845 }
21846 }
21847 }
21848
21849 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSSE3_LD64, strided_cm_subtile) {
21850 TEST_REQUIRES_X86_SSSE3;
21851 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021852 for (uint32_t n = 1; n <= 4; n++) {
21853 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070021854 GemmMicrokernelTester()
21855 .mr(2)
21856 .nr(4)
21857 .kr(8)
21858 .sr(1)
21859 .m(m)
21860 .n(n)
21861 .k(k)
21862 .cm_stride(7)
21863 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021864 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021865 }
21866 }
21867 }
21868 }
21869
21870 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSSE3_LD64, qmin) {
21871 TEST_REQUIRES_X86_SSSE3;
21872 GemmMicrokernelTester()
21873 .mr(2)
21874 .nr(4)
21875 .kr(8)
21876 .sr(1)
21877 .m(2)
21878 .n(4)
21879 .k(8)
21880 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021881 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021882 }
21883
21884 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSSE3_LD64, qmax) {
21885 TEST_REQUIRES_X86_SSSE3;
21886 GemmMicrokernelTester()
21887 .mr(2)
21888 .nr(4)
21889 .kr(8)
21890 .sr(1)
21891 .m(2)
21892 .n(4)
21893 .k(8)
21894 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021895 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021896 }
21897
21898 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSSE3_LD64, strided_cm) {
21899 TEST_REQUIRES_X86_SSSE3;
21900 GemmMicrokernelTester()
21901 .mr(2)
21902 .nr(4)
21903 .kr(8)
21904 .sr(1)
21905 .m(2)
21906 .n(4)
21907 .k(8)
21908 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080021909 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021910 }
21911#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
21912
21913
21914#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhanc46e6712021-06-01 19:00:16 -070021915 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_eq_8) {
21916 TEST_REQUIRES_X86_SSE41;
21917 GemmMicrokernelTester()
21918 .mr(1)
21919 .nr(4)
21920 .kr(8)
21921 .sr(1)
21922 .m(1)
21923 .n(4)
21924 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080021925 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021926 }
21927
21928 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64, strided_cn) {
21929 TEST_REQUIRES_X86_SSE41;
21930 GemmMicrokernelTester()
21931 .mr(1)
21932 .nr(4)
21933 .kr(8)
21934 .sr(1)
21935 .m(1)
21936 .n(4)
21937 .k(8)
21938 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080021939 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021940 }
21941
21942 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_eq_8_strided_a) {
21943 TEST_REQUIRES_X86_SSE41;
21944 GemmMicrokernelTester()
21945 .mr(1)
21946 .nr(4)
21947 .kr(8)
21948 .sr(1)
21949 .m(1)
21950 .n(4)
21951 .k(8)
21952 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080021953 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021954 }
21955
21956 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_eq_8_subtile) {
21957 TEST_REQUIRES_X86_SSE41;
Zhi An Ng83844ae2022-01-14 09:52:25 -080021958 for (uint32_t n = 1; n <= 4; n++) {
21959 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070021960 GemmMicrokernelTester()
21961 .mr(1)
21962 .nr(4)
21963 .kr(8)
21964 .sr(1)
21965 .m(m)
21966 .n(n)
21967 .k(8)
21968 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021969 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021970 }
21971 }
21972 }
21973
21974 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_eq_8_subtile_m) {
21975 TEST_REQUIRES_X86_SSE41;
21976 for (uint32_t m = 1; m <= 1; m++) {
21977 GemmMicrokernelTester()
21978 .mr(1)
21979 .nr(4)
21980 .kr(8)
21981 .sr(1)
21982 .m(m)
21983 .n(4)
21984 .k(8)
21985 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021986 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070021987 }
21988 }
21989
21990 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_eq_8_subtile_n) {
21991 TEST_REQUIRES_X86_SSE41;
21992 for (uint32_t n = 1; n <= 4; n++) {
21993 GemmMicrokernelTester()
21994 .mr(1)
21995 .nr(4)
21996 .kr(8)
21997 .sr(1)
21998 .m(1)
21999 .n(n)
22000 .k(8)
22001 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022002 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022003 }
22004 }
22005
22006 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_lt_8) {
22007 TEST_REQUIRES_X86_SSE41;
22008 for (size_t k = 1; k < 8; k++) {
22009 GemmMicrokernelTester()
22010 .mr(1)
22011 .nr(4)
22012 .kr(8)
22013 .sr(1)
22014 .m(1)
22015 .n(4)
22016 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080022017 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022018 }
22019 }
22020
22021 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_lt_8_strided_a) {
22022 TEST_REQUIRES_X86_SSE41;
22023 for (size_t k = 1; k < 8; k++) {
22024 GemmMicrokernelTester()
22025 .mr(1)
22026 .nr(4)
22027 .kr(8)
22028 .sr(1)
22029 .m(1)
22030 .n(4)
22031 .k(k)
22032 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080022033 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022034 }
22035 }
22036
22037 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_lt_8_subtile) {
22038 TEST_REQUIRES_X86_SSE41;
22039 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022040 for (uint32_t n = 1; n <= 4; n++) {
22041 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070022042 GemmMicrokernelTester()
22043 .mr(1)
22044 .nr(4)
22045 .kr(8)
22046 .sr(1)
22047 .m(m)
22048 .n(n)
22049 .k(k)
22050 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022051 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022052 }
22053 }
22054 }
22055 }
22056
22057 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_gt_8) {
22058 TEST_REQUIRES_X86_SSE41;
22059 for (size_t k = 9; k < 16; k++) {
22060 GemmMicrokernelTester()
22061 .mr(1)
22062 .nr(4)
22063 .kr(8)
22064 .sr(1)
22065 .m(1)
22066 .n(4)
22067 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080022068 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022069 }
22070 }
22071
22072 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_gt_8_strided_a) {
22073 TEST_REQUIRES_X86_SSE41;
22074 for (size_t k = 9; k < 16; k++) {
22075 GemmMicrokernelTester()
22076 .mr(1)
22077 .nr(4)
22078 .kr(8)
22079 .sr(1)
22080 .m(1)
22081 .n(4)
22082 .k(k)
22083 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080022084 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022085 }
22086 }
22087
22088 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_gt_8_subtile) {
22089 TEST_REQUIRES_X86_SSE41;
22090 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022091 for (uint32_t n = 1; n <= 4; n++) {
22092 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070022093 GemmMicrokernelTester()
22094 .mr(1)
22095 .nr(4)
22096 .kr(8)
22097 .sr(1)
22098 .m(m)
22099 .n(n)
22100 .k(k)
22101 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022102 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022103 }
22104 }
22105 }
22106 }
22107
22108 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_div_8) {
22109 TEST_REQUIRES_X86_SSE41;
22110 for (size_t k = 16; k <= 80; k += 8) {
22111 GemmMicrokernelTester()
22112 .mr(1)
22113 .nr(4)
22114 .kr(8)
22115 .sr(1)
22116 .m(1)
22117 .n(4)
22118 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080022119 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022120 }
22121 }
22122
22123 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_div_8_strided_a) {
22124 TEST_REQUIRES_X86_SSE41;
22125 for (size_t k = 16; k <= 80; k += 8) {
22126 GemmMicrokernelTester()
22127 .mr(1)
22128 .nr(4)
22129 .kr(8)
22130 .sr(1)
22131 .m(1)
22132 .n(4)
22133 .k(k)
22134 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080022135 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022136 }
22137 }
22138
22139 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_div_8_subtile) {
22140 TEST_REQUIRES_X86_SSE41;
22141 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022142 for (uint32_t n = 1; n <= 4; n++) {
22143 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070022144 GemmMicrokernelTester()
22145 .mr(1)
22146 .nr(4)
22147 .kr(8)
22148 .sr(1)
22149 .m(m)
22150 .n(n)
22151 .k(k)
22152 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022153 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022154 }
22155 }
22156 }
22157 }
22158
22159 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64, n_gt_4) {
22160 TEST_REQUIRES_X86_SSE41;
22161 for (uint32_t n = 5; n < 8; n++) {
22162 for (size_t k = 1; k <= 40; k += 9) {
22163 GemmMicrokernelTester()
22164 .mr(1)
22165 .nr(4)
22166 .kr(8)
22167 .sr(1)
22168 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022169 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070022170 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080022171 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022172 }
22173 }
22174 }
22175
22176 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64, n_gt_4_strided_cn) {
22177 TEST_REQUIRES_X86_SSE41;
22178 for (uint32_t n = 5; n < 8; n++) {
22179 for (size_t k = 1; k <= 40; k += 9) {
22180 GemmMicrokernelTester()
22181 .mr(1)
22182 .nr(4)
22183 .kr(8)
22184 .sr(1)
22185 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022186 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070022187 .k(k)
22188 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080022189 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022190 }
22191 }
22192 }
22193
22194 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64, n_gt_4_strided_a) {
22195 TEST_REQUIRES_X86_SSE41;
22196 for (uint32_t n = 5; n < 8; n++) {
22197 for (size_t k = 1; k <= 40; k += 9) {
22198 GemmMicrokernelTester()
22199 .mr(1)
22200 .nr(4)
22201 .kr(8)
22202 .sr(1)
22203 .m(1)
22204 .n(n)
22205 .k(k)
22206 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080022207 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022208 }
22209 }
22210 }
22211
22212 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64, n_gt_4_subtile) {
22213 TEST_REQUIRES_X86_SSE41;
22214 for (uint32_t n = 5; n < 8; n++) {
22215 for (size_t k = 1; k <= 40; k += 9) {
22216 for (uint32_t m = 1; m <= 1; m++) {
22217 GemmMicrokernelTester()
22218 .mr(1)
22219 .nr(4)
22220 .kr(8)
22221 .sr(1)
22222 .m(m)
22223 .n(n)
22224 .k(k)
22225 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022226 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022227 }
22228 }
22229 }
22230 }
22231
22232 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64, n_div_4) {
22233 TEST_REQUIRES_X86_SSE41;
22234 for (uint32_t n = 8; n <= 12; n += 4) {
22235 for (size_t k = 1; k <= 40; k += 9) {
22236 GemmMicrokernelTester()
22237 .mr(1)
22238 .nr(4)
22239 .kr(8)
22240 .sr(1)
22241 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022242 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070022243 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080022244 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022245 }
22246 }
22247 }
22248
22249 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64, n_div_4_strided_cn) {
22250 TEST_REQUIRES_X86_SSE41;
22251 for (uint32_t n = 8; n <= 12; n += 4) {
22252 for (size_t k = 1; k <= 40; k += 9) {
22253 GemmMicrokernelTester()
22254 .mr(1)
22255 .nr(4)
22256 .kr(8)
22257 .sr(1)
22258 .m(1)
22259 .n(n)
22260 .k(k)
22261 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080022262 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022263 }
22264 }
22265 }
22266
22267 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64, n_div_4_strided_a) {
22268 TEST_REQUIRES_X86_SSE41;
22269 for (uint32_t n = 8; n <= 12; n += 4) {
22270 for (size_t k = 1; k <= 40; k += 9) {
22271 GemmMicrokernelTester()
22272 .mr(1)
22273 .nr(4)
22274 .kr(8)
22275 .sr(1)
22276 .m(1)
22277 .n(n)
22278 .k(k)
22279 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080022280 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022281 }
22282 }
22283 }
22284
22285 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64, n_div_4_subtile) {
22286 TEST_REQUIRES_X86_SSE41;
22287 for (uint32_t n = 8; n <= 12; n += 4) {
22288 for (size_t k = 1; k <= 40; k += 9) {
22289 for (uint32_t m = 1; m <= 1; m++) {
22290 GemmMicrokernelTester()
22291 .mr(1)
22292 .nr(4)
22293 .kr(8)
22294 .sr(1)
22295 .m(m)
22296 .n(n)
22297 .k(k)
22298 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022299 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022300 }
22301 }
22302 }
22303 }
22304
22305 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64, strided_cm_subtile) {
22306 TEST_REQUIRES_X86_SSE41;
22307 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022308 for (uint32_t n = 1; n <= 4; n++) {
22309 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070022310 GemmMicrokernelTester()
22311 .mr(1)
22312 .nr(4)
22313 .kr(8)
22314 .sr(1)
22315 .m(m)
22316 .n(n)
22317 .k(k)
22318 .cm_stride(7)
22319 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022320 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022321 }
22322 }
22323 }
22324 }
22325
22326 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64, qmin) {
22327 TEST_REQUIRES_X86_SSE41;
22328 GemmMicrokernelTester()
22329 .mr(1)
22330 .nr(4)
22331 .kr(8)
22332 .sr(1)
22333 .m(1)
22334 .n(4)
22335 .k(8)
22336 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022337 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022338 }
22339
22340 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64, qmax) {
22341 TEST_REQUIRES_X86_SSE41;
22342 GemmMicrokernelTester()
22343 .mr(1)
22344 .nr(4)
22345 .kr(8)
22346 .sr(1)
22347 .m(1)
22348 .n(4)
22349 .k(8)
22350 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022351 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022352 }
22353
22354 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD64, strided_cm) {
22355 TEST_REQUIRES_X86_SSE41;
22356 GemmMicrokernelTester()
22357 .mr(1)
22358 .nr(4)
22359 .kr(8)
22360 .sr(1)
22361 .m(1)
22362 .n(4)
22363 .k(8)
22364 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080022365 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022366 }
22367#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
22368
22369
22370#if XNN_ARCH_X86 || XNN_ARCH_X86_64
22371 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_eq_8) {
22372 TEST_REQUIRES_X86_SSE41;
22373 GemmMicrokernelTester()
22374 .mr(2)
22375 .nr(4)
22376 .kr(8)
22377 .sr(1)
22378 .m(2)
22379 .n(4)
22380 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080022381 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022382 }
22383
22384 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64, strided_cn) {
22385 TEST_REQUIRES_X86_SSE41;
22386 GemmMicrokernelTester()
22387 .mr(2)
22388 .nr(4)
22389 .kr(8)
22390 .sr(1)
22391 .m(2)
22392 .n(4)
22393 .k(8)
22394 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080022395 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022396 }
22397
22398 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_eq_8_strided_a) {
22399 TEST_REQUIRES_X86_SSE41;
22400 GemmMicrokernelTester()
22401 .mr(2)
22402 .nr(4)
22403 .kr(8)
22404 .sr(1)
22405 .m(2)
22406 .n(4)
22407 .k(8)
22408 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080022409 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022410 }
22411
22412 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_eq_8_subtile) {
22413 TEST_REQUIRES_X86_SSE41;
Zhi An Ng83844ae2022-01-14 09:52:25 -080022414 for (uint32_t n = 1; n <= 4; n++) {
22415 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070022416 GemmMicrokernelTester()
22417 .mr(2)
22418 .nr(4)
22419 .kr(8)
22420 .sr(1)
22421 .m(m)
22422 .n(n)
22423 .k(8)
22424 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022425 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022426 }
22427 }
22428 }
22429
22430 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_eq_8_subtile_m) {
22431 TEST_REQUIRES_X86_SSE41;
22432 for (uint32_t m = 1; m <= 2; m++) {
22433 GemmMicrokernelTester()
22434 .mr(2)
22435 .nr(4)
22436 .kr(8)
22437 .sr(1)
22438 .m(m)
22439 .n(4)
22440 .k(8)
22441 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022442 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022443 }
22444 }
22445
22446 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_eq_8_subtile_n) {
22447 TEST_REQUIRES_X86_SSE41;
22448 for (uint32_t n = 1; n <= 4; n++) {
22449 GemmMicrokernelTester()
22450 .mr(2)
22451 .nr(4)
22452 .kr(8)
22453 .sr(1)
22454 .m(2)
22455 .n(n)
22456 .k(8)
22457 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022458 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022459 }
22460 }
22461
22462 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_lt_8) {
22463 TEST_REQUIRES_X86_SSE41;
22464 for (size_t k = 1; k < 8; k++) {
22465 GemmMicrokernelTester()
22466 .mr(2)
22467 .nr(4)
22468 .kr(8)
22469 .sr(1)
22470 .m(2)
22471 .n(4)
22472 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080022473 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022474 }
22475 }
22476
22477 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_lt_8_strided_a) {
22478 TEST_REQUIRES_X86_SSE41;
22479 for (size_t k = 1; k < 8; k++) {
22480 GemmMicrokernelTester()
22481 .mr(2)
22482 .nr(4)
22483 .kr(8)
22484 .sr(1)
22485 .m(2)
22486 .n(4)
22487 .k(k)
22488 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080022489 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022490 }
22491 }
22492
22493 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_lt_8_subtile) {
22494 TEST_REQUIRES_X86_SSE41;
22495 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022496 for (uint32_t n = 1; n <= 4; n++) {
22497 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070022498 GemmMicrokernelTester()
22499 .mr(2)
22500 .nr(4)
22501 .kr(8)
22502 .sr(1)
22503 .m(m)
22504 .n(n)
22505 .k(k)
22506 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022507 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022508 }
22509 }
22510 }
22511 }
22512
22513 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_gt_8) {
22514 TEST_REQUIRES_X86_SSE41;
22515 for (size_t k = 9; k < 16; k++) {
22516 GemmMicrokernelTester()
22517 .mr(2)
22518 .nr(4)
22519 .kr(8)
22520 .sr(1)
22521 .m(2)
22522 .n(4)
22523 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080022524 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022525 }
22526 }
22527
22528 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_gt_8_strided_a) {
22529 TEST_REQUIRES_X86_SSE41;
22530 for (size_t k = 9; k < 16; k++) {
22531 GemmMicrokernelTester()
22532 .mr(2)
22533 .nr(4)
22534 .kr(8)
22535 .sr(1)
22536 .m(2)
22537 .n(4)
22538 .k(k)
22539 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080022540 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022541 }
22542 }
22543
22544 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_gt_8_subtile) {
22545 TEST_REQUIRES_X86_SSE41;
22546 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022547 for (uint32_t n = 1; n <= 4; n++) {
22548 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070022549 GemmMicrokernelTester()
22550 .mr(2)
22551 .nr(4)
22552 .kr(8)
22553 .sr(1)
22554 .m(m)
22555 .n(n)
22556 .k(k)
22557 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022558 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022559 }
22560 }
22561 }
22562 }
22563
22564 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_div_8) {
22565 TEST_REQUIRES_X86_SSE41;
22566 for (size_t k = 16; k <= 80; k += 8) {
22567 GemmMicrokernelTester()
22568 .mr(2)
22569 .nr(4)
22570 .kr(8)
22571 .sr(1)
22572 .m(2)
22573 .n(4)
22574 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080022575 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022576 }
22577 }
22578
22579 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_div_8_strided_a) {
22580 TEST_REQUIRES_X86_SSE41;
22581 for (size_t k = 16; k <= 80; k += 8) {
22582 GemmMicrokernelTester()
22583 .mr(2)
22584 .nr(4)
22585 .kr(8)
22586 .sr(1)
22587 .m(2)
22588 .n(4)
22589 .k(k)
22590 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080022591 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022592 }
22593 }
22594
22595 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_div_8_subtile) {
22596 TEST_REQUIRES_X86_SSE41;
22597 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022598 for (uint32_t n = 1; n <= 4; n++) {
22599 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070022600 GemmMicrokernelTester()
22601 .mr(2)
22602 .nr(4)
22603 .kr(8)
22604 .sr(1)
22605 .m(m)
22606 .n(n)
22607 .k(k)
22608 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022609 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022610 }
22611 }
22612 }
22613 }
22614
22615 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64, n_gt_4) {
22616 TEST_REQUIRES_X86_SSE41;
22617 for (uint32_t n = 5; n < 8; n++) {
22618 for (size_t k = 1; k <= 40; k += 9) {
22619 GemmMicrokernelTester()
22620 .mr(2)
22621 .nr(4)
22622 .kr(8)
22623 .sr(1)
22624 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022625 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070022626 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080022627 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022628 }
22629 }
22630 }
22631
22632 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64, n_gt_4_strided_cn) {
22633 TEST_REQUIRES_X86_SSE41;
22634 for (uint32_t n = 5; n < 8; n++) {
22635 for (size_t k = 1; k <= 40; k += 9) {
22636 GemmMicrokernelTester()
22637 .mr(2)
22638 .nr(4)
22639 .kr(8)
22640 .sr(1)
22641 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022642 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070022643 .k(k)
22644 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080022645 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022646 }
22647 }
22648 }
22649
22650 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64, n_gt_4_strided_a) {
22651 TEST_REQUIRES_X86_SSE41;
22652 for (uint32_t n = 5; n < 8; n++) {
22653 for (size_t k = 1; k <= 40; k += 9) {
22654 GemmMicrokernelTester()
22655 .mr(2)
22656 .nr(4)
22657 .kr(8)
22658 .sr(1)
22659 .m(2)
22660 .n(n)
22661 .k(k)
22662 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080022663 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022664 }
22665 }
22666 }
22667
22668 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64, n_gt_4_subtile) {
22669 TEST_REQUIRES_X86_SSE41;
22670 for (uint32_t n = 5; n < 8; n++) {
22671 for (size_t k = 1; k <= 40; k += 9) {
22672 for (uint32_t m = 1; m <= 2; m++) {
22673 GemmMicrokernelTester()
22674 .mr(2)
22675 .nr(4)
22676 .kr(8)
22677 .sr(1)
22678 .m(m)
22679 .n(n)
22680 .k(k)
22681 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022682 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022683 }
22684 }
22685 }
22686 }
22687
22688 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64, n_div_4) {
22689 TEST_REQUIRES_X86_SSE41;
22690 for (uint32_t n = 8; n <= 12; n += 4) {
22691 for (size_t k = 1; k <= 40; k += 9) {
22692 GemmMicrokernelTester()
22693 .mr(2)
22694 .nr(4)
22695 .kr(8)
22696 .sr(1)
22697 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022698 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070022699 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080022700 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022701 }
22702 }
22703 }
22704
22705 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64, n_div_4_strided_cn) {
22706 TEST_REQUIRES_X86_SSE41;
22707 for (uint32_t n = 8; n <= 12; n += 4) {
22708 for (size_t k = 1; k <= 40; k += 9) {
22709 GemmMicrokernelTester()
22710 .mr(2)
22711 .nr(4)
22712 .kr(8)
22713 .sr(1)
22714 .m(2)
22715 .n(n)
22716 .k(k)
22717 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080022718 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022719 }
22720 }
22721 }
22722
22723 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64, n_div_4_strided_a) {
22724 TEST_REQUIRES_X86_SSE41;
22725 for (uint32_t n = 8; n <= 12; n += 4) {
22726 for (size_t k = 1; k <= 40; k += 9) {
22727 GemmMicrokernelTester()
22728 .mr(2)
22729 .nr(4)
22730 .kr(8)
22731 .sr(1)
22732 .m(2)
22733 .n(n)
22734 .k(k)
22735 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080022736 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022737 }
22738 }
22739 }
22740
22741 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64, n_div_4_subtile) {
22742 TEST_REQUIRES_X86_SSE41;
22743 for (uint32_t n = 8; n <= 12; n += 4) {
22744 for (size_t k = 1; k <= 40; k += 9) {
22745 for (uint32_t m = 1; m <= 2; m++) {
22746 GemmMicrokernelTester()
22747 .mr(2)
22748 .nr(4)
22749 .kr(8)
22750 .sr(1)
22751 .m(m)
22752 .n(n)
22753 .k(k)
22754 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022755 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022756 }
22757 }
22758 }
22759 }
22760
22761 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64, strided_cm_subtile) {
22762 TEST_REQUIRES_X86_SSE41;
22763 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022764 for (uint32_t n = 1; n <= 4; n++) {
22765 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070022766 GemmMicrokernelTester()
22767 .mr(2)
22768 .nr(4)
22769 .kr(8)
22770 .sr(1)
22771 .m(m)
22772 .n(n)
22773 .k(k)
22774 .cm_stride(7)
22775 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022776 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022777 }
22778 }
22779 }
22780 }
22781
22782 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64, qmin) {
22783 TEST_REQUIRES_X86_SSE41;
22784 GemmMicrokernelTester()
22785 .mr(2)
22786 .nr(4)
22787 .kr(8)
22788 .sr(1)
22789 .m(2)
22790 .n(4)
22791 .k(8)
22792 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022793 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022794 }
22795
22796 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64, qmax) {
22797 TEST_REQUIRES_X86_SSE41;
22798 GemmMicrokernelTester()
22799 .mr(2)
22800 .nr(4)
22801 .kr(8)
22802 .sr(1)
22803 .m(2)
22804 .n(4)
22805 .k(8)
22806 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022807 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022808 }
22809
22810 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64, strided_cm) {
22811 TEST_REQUIRES_X86_SSE41;
22812 GemmMicrokernelTester()
22813 .mr(2)
22814 .nr(4)
22815 .kr(8)
22816 .sr(1)
22817 .m(2)
22818 .n(4)
22819 .k(8)
22820 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080022821 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022822 }
22823#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
22824
22825
22826#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhanc46e6712021-06-01 19:00:16 -070022827 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_eq_8) {
22828 TEST_REQUIRES_X86_AVX;
22829 GemmMicrokernelTester()
22830 .mr(2)
22831 .nr(4)
22832 .kr(8)
22833 .sr(1)
22834 .m(2)
22835 .n(4)
22836 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080022837 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022838 }
22839
22840 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, strided_cn) {
22841 TEST_REQUIRES_X86_AVX;
22842 GemmMicrokernelTester()
22843 .mr(2)
22844 .nr(4)
22845 .kr(8)
22846 .sr(1)
22847 .m(2)
22848 .n(4)
22849 .k(8)
22850 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080022851 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022852 }
22853
22854 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_eq_8_strided_a) {
22855 TEST_REQUIRES_X86_AVX;
22856 GemmMicrokernelTester()
22857 .mr(2)
22858 .nr(4)
22859 .kr(8)
22860 .sr(1)
22861 .m(2)
22862 .n(4)
22863 .k(8)
22864 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080022865 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022866 }
22867
22868 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_eq_8_subtile) {
22869 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080022870 for (uint32_t n = 1; n <= 4; n++) {
22871 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070022872 GemmMicrokernelTester()
22873 .mr(2)
22874 .nr(4)
22875 .kr(8)
22876 .sr(1)
22877 .m(m)
22878 .n(n)
22879 .k(8)
22880 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022881 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022882 }
22883 }
22884 }
22885
22886 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_eq_8_subtile_m) {
22887 TEST_REQUIRES_X86_AVX;
22888 for (uint32_t m = 1; m <= 2; m++) {
22889 GemmMicrokernelTester()
22890 .mr(2)
22891 .nr(4)
22892 .kr(8)
22893 .sr(1)
22894 .m(m)
22895 .n(4)
22896 .k(8)
22897 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022898 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022899 }
22900 }
22901
22902 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_eq_8_subtile_n) {
22903 TEST_REQUIRES_X86_AVX;
22904 for (uint32_t n = 1; n <= 4; n++) {
22905 GemmMicrokernelTester()
22906 .mr(2)
22907 .nr(4)
22908 .kr(8)
22909 .sr(1)
22910 .m(2)
22911 .n(n)
22912 .k(8)
22913 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022914 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022915 }
22916 }
22917
22918 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_lt_8) {
22919 TEST_REQUIRES_X86_AVX;
22920 for (size_t k = 1; k < 8; k++) {
22921 GemmMicrokernelTester()
22922 .mr(2)
22923 .nr(4)
22924 .kr(8)
22925 .sr(1)
22926 .m(2)
22927 .n(4)
22928 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080022929 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022930 }
22931 }
22932
22933 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_lt_8_strided_a) {
22934 TEST_REQUIRES_X86_AVX;
22935 for (size_t k = 1; k < 8; k++) {
22936 GemmMicrokernelTester()
22937 .mr(2)
22938 .nr(4)
22939 .kr(8)
22940 .sr(1)
22941 .m(2)
22942 .n(4)
22943 .k(k)
22944 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080022945 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022946 }
22947 }
22948
22949 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_lt_8_subtile) {
22950 TEST_REQUIRES_X86_AVX;
22951 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022952 for (uint32_t n = 1; n <= 4; n++) {
22953 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070022954 GemmMicrokernelTester()
22955 .mr(2)
22956 .nr(4)
22957 .kr(8)
22958 .sr(1)
22959 .m(m)
22960 .n(n)
22961 .k(k)
22962 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022963 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022964 }
22965 }
22966 }
22967 }
22968
22969 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_gt_8) {
22970 TEST_REQUIRES_X86_AVX;
22971 for (size_t k = 9; k < 16; k++) {
22972 GemmMicrokernelTester()
22973 .mr(2)
22974 .nr(4)
22975 .kr(8)
22976 .sr(1)
22977 .m(2)
22978 .n(4)
22979 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080022980 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022981 }
22982 }
22983
22984 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_gt_8_strided_a) {
22985 TEST_REQUIRES_X86_AVX;
22986 for (size_t k = 9; k < 16; k++) {
22987 GemmMicrokernelTester()
22988 .mr(2)
22989 .nr(4)
22990 .kr(8)
22991 .sr(1)
22992 .m(2)
22993 .n(4)
22994 .k(k)
22995 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080022996 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070022997 }
22998 }
22999
23000 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_gt_8_subtile) {
23001 TEST_REQUIRES_X86_AVX;
23002 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023003 for (uint32_t n = 1; n <= 4; n++) {
23004 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070023005 GemmMicrokernelTester()
23006 .mr(2)
23007 .nr(4)
23008 .kr(8)
23009 .sr(1)
23010 .m(m)
23011 .n(n)
23012 .k(k)
23013 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023014 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023015 }
23016 }
23017 }
23018 }
23019
23020 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_div_8) {
23021 TEST_REQUIRES_X86_AVX;
23022 for (size_t k = 16; k <= 80; k += 8) {
23023 GemmMicrokernelTester()
23024 .mr(2)
23025 .nr(4)
23026 .kr(8)
23027 .sr(1)
23028 .m(2)
23029 .n(4)
23030 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080023031 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023032 }
23033 }
23034
23035 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_div_8_strided_a) {
23036 TEST_REQUIRES_X86_AVX;
23037 for (size_t k = 16; k <= 80; k += 8) {
23038 GemmMicrokernelTester()
23039 .mr(2)
23040 .nr(4)
23041 .kr(8)
23042 .sr(1)
23043 .m(2)
23044 .n(4)
23045 .k(k)
23046 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080023047 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023048 }
23049 }
23050
23051 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_div_8_subtile) {
23052 TEST_REQUIRES_X86_AVX;
23053 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023054 for (uint32_t n = 1; n <= 4; n++) {
23055 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070023056 GemmMicrokernelTester()
23057 .mr(2)
23058 .nr(4)
23059 .kr(8)
23060 .sr(1)
23061 .m(m)
23062 .n(n)
23063 .k(k)
23064 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023065 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023066 }
23067 }
23068 }
23069 }
23070
23071 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_gt_4) {
23072 TEST_REQUIRES_X86_AVX;
23073 for (uint32_t n = 5; n < 8; n++) {
23074 for (size_t k = 1; k <= 40; k += 9) {
23075 GemmMicrokernelTester()
23076 .mr(2)
23077 .nr(4)
23078 .kr(8)
23079 .sr(1)
23080 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023081 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070023082 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080023083 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023084 }
23085 }
23086 }
23087
23088 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_gt_4_strided_cn) {
23089 TEST_REQUIRES_X86_AVX;
23090 for (uint32_t n = 5; n < 8; n++) {
23091 for (size_t k = 1; k <= 40; k += 9) {
23092 GemmMicrokernelTester()
23093 .mr(2)
23094 .nr(4)
23095 .kr(8)
23096 .sr(1)
23097 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023098 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070023099 .k(k)
23100 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080023101 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023102 }
23103 }
23104 }
23105
23106 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_gt_4_strided_a) {
23107 TEST_REQUIRES_X86_AVX;
23108 for (uint32_t n = 5; n < 8; n++) {
23109 for (size_t k = 1; k <= 40; k += 9) {
23110 GemmMicrokernelTester()
23111 .mr(2)
23112 .nr(4)
23113 .kr(8)
23114 .sr(1)
23115 .m(2)
23116 .n(n)
23117 .k(k)
23118 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080023119 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023120 }
23121 }
23122 }
23123
23124 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_gt_4_subtile) {
23125 TEST_REQUIRES_X86_AVX;
23126 for (uint32_t n = 5; n < 8; n++) {
23127 for (size_t k = 1; k <= 40; k += 9) {
23128 for (uint32_t m = 1; m <= 2; m++) {
23129 GemmMicrokernelTester()
23130 .mr(2)
23131 .nr(4)
23132 .kr(8)
23133 .sr(1)
23134 .m(m)
23135 .n(n)
23136 .k(k)
23137 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023138 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023139 }
23140 }
23141 }
23142 }
23143
23144 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_div_4) {
23145 TEST_REQUIRES_X86_AVX;
23146 for (uint32_t n = 8; n <= 12; n += 4) {
23147 for (size_t k = 1; k <= 40; k += 9) {
23148 GemmMicrokernelTester()
23149 .mr(2)
23150 .nr(4)
23151 .kr(8)
23152 .sr(1)
23153 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023154 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070023155 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080023156 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023157 }
23158 }
23159 }
23160
23161 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_div_4_strided_cn) {
23162 TEST_REQUIRES_X86_AVX;
23163 for (uint32_t n = 8; n <= 12; n += 4) {
23164 for (size_t k = 1; k <= 40; k += 9) {
23165 GemmMicrokernelTester()
23166 .mr(2)
23167 .nr(4)
23168 .kr(8)
23169 .sr(1)
23170 .m(2)
23171 .n(n)
23172 .k(k)
23173 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080023174 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023175 }
23176 }
23177 }
23178
23179 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_div_4_strided_a) {
23180 TEST_REQUIRES_X86_AVX;
23181 for (uint32_t n = 8; n <= 12; n += 4) {
23182 for (size_t k = 1; k <= 40; k += 9) {
23183 GemmMicrokernelTester()
23184 .mr(2)
23185 .nr(4)
23186 .kr(8)
23187 .sr(1)
23188 .m(2)
23189 .n(n)
23190 .k(k)
23191 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080023192 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023193 }
23194 }
23195 }
23196
23197 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_div_4_subtile) {
23198 TEST_REQUIRES_X86_AVX;
23199 for (uint32_t n = 8; n <= 12; n += 4) {
23200 for (size_t k = 1; k <= 40; k += 9) {
23201 for (uint32_t m = 1; m <= 2; m++) {
23202 GemmMicrokernelTester()
23203 .mr(2)
23204 .nr(4)
23205 .kr(8)
23206 .sr(1)
23207 .m(m)
23208 .n(n)
23209 .k(k)
23210 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023211 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023212 }
23213 }
23214 }
23215 }
23216
23217 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, strided_cm_subtile) {
23218 TEST_REQUIRES_X86_AVX;
23219 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023220 for (uint32_t n = 1; n <= 4; n++) {
23221 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070023222 GemmMicrokernelTester()
23223 .mr(2)
23224 .nr(4)
23225 .kr(8)
23226 .sr(1)
23227 .m(m)
23228 .n(n)
23229 .k(k)
23230 .cm_stride(7)
23231 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023232 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023233 }
23234 }
23235 }
23236 }
23237
23238 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, qmin) {
23239 TEST_REQUIRES_X86_AVX;
23240 GemmMicrokernelTester()
23241 .mr(2)
23242 .nr(4)
23243 .kr(8)
23244 .sr(1)
23245 .m(2)
23246 .n(4)
23247 .k(8)
23248 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080023249 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023250 }
23251
23252 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, qmax) {
23253 TEST_REQUIRES_X86_AVX;
23254 GemmMicrokernelTester()
23255 .mr(2)
23256 .nr(4)
23257 .kr(8)
23258 .sr(1)
23259 .m(2)
23260 .n(4)
23261 .k(8)
23262 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080023263 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023264 }
23265
23266 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__AVX_LD64, strided_cm) {
23267 TEST_REQUIRES_X86_AVX;
23268 GemmMicrokernelTester()
23269 .mr(2)
23270 .nr(4)
23271 .kr(8)
23272 .sr(1)
23273 .m(2)
23274 .n(4)
23275 .k(8)
23276 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080023277 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023278 }
23279#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
23280
23281
23282#if XNN_ARCH_X86 || XNN_ARCH_X86_64
23283 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_eq_8) {
23284 TEST_REQUIRES_X86_AVX;
23285 GemmMicrokernelTester()
23286 .mr(3)
23287 .nr(4)
23288 .kr(8)
23289 .sr(1)
23290 .m(3)
23291 .n(4)
23292 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080023293 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023294 }
23295
23296 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, strided_cn) {
23297 TEST_REQUIRES_X86_AVX;
23298 GemmMicrokernelTester()
23299 .mr(3)
23300 .nr(4)
23301 .kr(8)
23302 .sr(1)
23303 .m(3)
23304 .n(4)
23305 .k(8)
23306 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080023307 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023308 }
23309
23310 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_eq_8_strided_a) {
23311 TEST_REQUIRES_X86_AVX;
23312 GemmMicrokernelTester()
23313 .mr(3)
23314 .nr(4)
23315 .kr(8)
23316 .sr(1)
23317 .m(3)
23318 .n(4)
23319 .k(8)
23320 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080023321 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023322 }
23323
23324 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_eq_8_subtile) {
23325 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080023326 for (uint32_t n = 1; n <= 4; n++) {
23327 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070023328 GemmMicrokernelTester()
23329 .mr(3)
23330 .nr(4)
23331 .kr(8)
23332 .sr(1)
23333 .m(m)
23334 .n(n)
23335 .k(8)
23336 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023337 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023338 }
23339 }
23340 }
23341
23342 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_eq_8_subtile_m) {
23343 TEST_REQUIRES_X86_AVX;
23344 for (uint32_t m = 1; m <= 3; m++) {
23345 GemmMicrokernelTester()
23346 .mr(3)
23347 .nr(4)
23348 .kr(8)
23349 .sr(1)
23350 .m(m)
23351 .n(4)
23352 .k(8)
23353 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023354 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023355 }
23356 }
23357
23358 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_eq_8_subtile_n) {
23359 TEST_REQUIRES_X86_AVX;
23360 for (uint32_t n = 1; n <= 4; n++) {
23361 GemmMicrokernelTester()
23362 .mr(3)
23363 .nr(4)
23364 .kr(8)
23365 .sr(1)
23366 .m(3)
23367 .n(n)
23368 .k(8)
23369 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023370 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023371 }
23372 }
23373
23374 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_lt_8) {
23375 TEST_REQUIRES_X86_AVX;
23376 for (size_t k = 1; k < 8; k++) {
23377 GemmMicrokernelTester()
23378 .mr(3)
23379 .nr(4)
23380 .kr(8)
23381 .sr(1)
23382 .m(3)
23383 .n(4)
23384 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080023385 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023386 }
23387 }
23388
23389 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_lt_8_strided_a) {
23390 TEST_REQUIRES_X86_AVX;
23391 for (size_t k = 1; k < 8; k++) {
23392 GemmMicrokernelTester()
23393 .mr(3)
23394 .nr(4)
23395 .kr(8)
23396 .sr(1)
23397 .m(3)
23398 .n(4)
23399 .k(k)
23400 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080023401 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023402 }
23403 }
23404
23405 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_lt_8_subtile) {
23406 TEST_REQUIRES_X86_AVX;
23407 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023408 for (uint32_t n = 1; n <= 4; n++) {
23409 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070023410 GemmMicrokernelTester()
23411 .mr(3)
23412 .nr(4)
23413 .kr(8)
23414 .sr(1)
23415 .m(m)
23416 .n(n)
23417 .k(k)
23418 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023419 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023420 }
23421 }
23422 }
23423 }
23424
23425 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_gt_8) {
23426 TEST_REQUIRES_X86_AVX;
23427 for (size_t k = 9; k < 16; k++) {
23428 GemmMicrokernelTester()
23429 .mr(3)
23430 .nr(4)
23431 .kr(8)
23432 .sr(1)
23433 .m(3)
23434 .n(4)
23435 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080023436 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023437 }
23438 }
23439
23440 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_gt_8_strided_a) {
23441 TEST_REQUIRES_X86_AVX;
23442 for (size_t k = 9; k < 16; k++) {
23443 GemmMicrokernelTester()
23444 .mr(3)
23445 .nr(4)
23446 .kr(8)
23447 .sr(1)
23448 .m(3)
23449 .n(4)
23450 .k(k)
23451 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080023452 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023453 }
23454 }
23455
23456 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_gt_8_subtile) {
23457 TEST_REQUIRES_X86_AVX;
23458 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023459 for (uint32_t n = 1; n <= 4; n++) {
23460 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070023461 GemmMicrokernelTester()
23462 .mr(3)
23463 .nr(4)
23464 .kr(8)
23465 .sr(1)
23466 .m(m)
23467 .n(n)
23468 .k(k)
23469 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023470 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023471 }
23472 }
23473 }
23474 }
23475
23476 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_div_8) {
23477 TEST_REQUIRES_X86_AVX;
23478 for (size_t k = 16; k <= 80; k += 8) {
23479 GemmMicrokernelTester()
23480 .mr(3)
23481 .nr(4)
23482 .kr(8)
23483 .sr(1)
23484 .m(3)
23485 .n(4)
23486 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080023487 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023488 }
23489 }
23490
23491 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_div_8_strided_a) {
23492 TEST_REQUIRES_X86_AVX;
23493 for (size_t k = 16; k <= 80; k += 8) {
23494 GemmMicrokernelTester()
23495 .mr(3)
23496 .nr(4)
23497 .kr(8)
23498 .sr(1)
23499 .m(3)
23500 .n(4)
23501 .k(k)
23502 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080023503 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023504 }
23505 }
23506
23507 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_div_8_subtile) {
23508 TEST_REQUIRES_X86_AVX;
23509 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023510 for (uint32_t n = 1; n <= 4; n++) {
23511 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070023512 GemmMicrokernelTester()
23513 .mr(3)
23514 .nr(4)
23515 .kr(8)
23516 .sr(1)
23517 .m(m)
23518 .n(n)
23519 .k(k)
23520 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023521 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023522 }
23523 }
23524 }
23525 }
23526
23527 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_gt_4) {
23528 TEST_REQUIRES_X86_AVX;
23529 for (uint32_t n = 5; n < 8; n++) {
23530 for (size_t k = 1; k <= 40; k += 9) {
23531 GemmMicrokernelTester()
23532 .mr(3)
23533 .nr(4)
23534 .kr(8)
23535 .sr(1)
23536 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023537 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070023538 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080023539 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023540 }
23541 }
23542 }
23543
23544 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_gt_4_strided_cn) {
23545 TEST_REQUIRES_X86_AVX;
23546 for (uint32_t n = 5; n < 8; n++) {
23547 for (size_t k = 1; k <= 40; k += 9) {
23548 GemmMicrokernelTester()
23549 .mr(3)
23550 .nr(4)
23551 .kr(8)
23552 .sr(1)
23553 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023554 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070023555 .k(k)
23556 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080023557 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023558 }
23559 }
23560 }
23561
23562 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_gt_4_strided_a) {
23563 TEST_REQUIRES_X86_AVX;
23564 for (uint32_t n = 5; n < 8; n++) {
23565 for (size_t k = 1; k <= 40; k += 9) {
23566 GemmMicrokernelTester()
23567 .mr(3)
23568 .nr(4)
23569 .kr(8)
23570 .sr(1)
23571 .m(3)
23572 .n(n)
23573 .k(k)
23574 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080023575 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023576 }
23577 }
23578 }
23579
23580 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_gt_4_subtile) {
23581 TEST_REQUIRES_X86_AVX;
23582 for (uint32_t n = 5; n < 8; n++) {
23583 for (size_t k = 1; k <= 40; k += 9) {
23584 for (uint32_t m = 1; m <= 3; m++) {
23585 GemmMicrokernelTester()
23586 .mr(3)
23587 .nr(4)
23588 .kr(8)
23589 .sr(1)
23590 .m(m)
23591 .n(n)
23592 .k(k)
23593 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023594 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023595 }
23596 }
23597 }
23598 }
23599
23600 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_div_4) {
23601 TEST_REQUIRES_X86_AVX;
23602 for (uint32_t n = 8; n <= 12; n += 4) {
23603 for (size_t k = 1; k <= 40; k += 9) {
23604 GemmMicrokernelTester()
23605 .mr(3)
23606 .nr(4)
23607 .kr(8)
23608 .sr(1)
23609 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023610 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070023611 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080023612 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023613 }
23614 }
23615 }
23616
23617 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_div_4_strided_cn) {
23618 TEST_REQUIRES_X86_AVX;
23619 for (uint32_t n = 8; n <= 12; n += 4) {
23620 for (size_t k = 1; k <= 40; k += 9) {
23621 GemmMicrokernelTester()
23622 .mr(3)
23623 .nr(4)
23624 .kr(8)
23625 .sr(1)
23626 .m(3)
23627 .n(n)
23628 .k(k)
23629 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080023630 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023631 }
23632 }
23633 }
23634
23635 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_div_4_strided_a) {
23636 TEST_REQUIRES_X86_AVX;
23637 for (uint32_t n = 8; n <= 12; n += 4) {
23638 for (size_t k = 1; k <= 40; k += 9) {
23639 GemmMicrokernelTester()
23640 .mr(3)
23641 .nr(4)
23642 .kr(8)
23643 .sr(1)
23644 .m(3)
23645 .n(n)
23646 .k(k)
23647 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080023648 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023649 }
23650 }
23651 }
23652
23653 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_div_4_subtile) {
23654 TEST_REQUIRES_X86_AVX;
23655 for (uint32_t n = 8; n <= 12; n += 4) {
23656 for (size_t k = 1; k <= 40; k += 9) {
23657 for (uint32_t m = 1; m <= 3; m++) {
23658 GemmMicrokernelTester()
23659 .mr(3)
23660 .nr(4)
23661 .kr(8)
23662 .sr(1)
23663 .m(m)
23664 .n(n)
23665 .k(k)
23666 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023667 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023668 }
23669 }
23670 }
23671 }
23672
23673 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, strided_cm_subtile) {
23674 TEST_REQUIRES_X86_AVX;
23675 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023676 for (uint32_t n = 1; n <= 4; n++) {
23677 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070023678 GemmMicrokernelTester()
23679 .mr(3)
23680 .nr(4)
23681 .kr(8)
23682 .sr(1)
23683 .m(m)
23684 .n(n)
23685 .k(k)
23686 .cm_stride(7)
23687 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023688 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023689 }
23690 }
23691 }
23692 }
23693
23694 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, qmin) {
23695 TEST_REQUIRES_X86_AVX;
23696 GemmMicrokernelTester()
23697 .mr(3)
23698 .nr(4)
23699 .kr(8)
23700 .sr(1)
23701 .m(3)
23702 .n(4)
23703 .k(8)
23704 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080023705 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023706 }
23707
23708 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, qmax) {
23709 TEST_REQUIRES_X86_AVX;
23710 GemmMicrokernelTester()
23711 .mr(3)
23712 .nr(4)
23713 .kr(8)
23714 .sr(1)
23715 .m(3)
23716 .n(4)
23717 .k(8)
23718 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080023719 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023720 }
23721
23722 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, strided_cm) {
23723 TEST_REQUIRES_X86_AVX;
23724 GemmMicrokernelTester()
23725 .mr(3)
23726 .nr(4)
23727 .kr(8)
23728 .sr(1)
23729 .m(3)
23730 .n(4)
23731 .k(8)
23732 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080023733 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023734 }
23735#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
23736
23737
23738#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhanc46e6712021-06-01 19:00:16 -070023739 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_eq_8) {
23740 TEST_REQUIRES_X86_XOP;
23741 GemmMicrokernelTester()
23742 .mr(2)
23743 .nr(4)
23744 .kr(8)
23745 .sr(1)
23746 .m(2)
23747 .n(4)
23748 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080023749 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023750 }
23751
23752 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, strided_cn) {
23753 TEST_REQUIRES_X86_XOP;
23754 GemmMicrokernelTester()
23755 .mr(2)
23756 .nr(4)
23757 .kr(8)
23758 .sr(1)
23759 .m(2)
23760 .n(4)
23761 .k(8)
23762 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080023763 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023764 }
23765
23766 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_eq_8_strided_a) {
23767 TEST_REQUIRES_X86_XOP;
23768 GemmMicrokernelTester()
23769 .mr(2)
23770 .nr(4)
23771 .kr(8)
23772 .sr(1)
23773 .m(2)
23774 .n(4)
23775 .k(8)
23776 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080023777 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023778 }
23779
23780 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_eq_8_subtile) {
23781 TEST_REQUIRES_X86_XOP;
Zhi An Ng83844ae2022-01-14 09:52:25 -080023782 for (uint32_t n = 1; n <= 4; n++) {
23783 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070023784 GemmMicrokernelTester()
23785 .mr(2)
23786 .nr(4)
23787 .kr(8)
23788 .sr(1)
23789 .m(m)
23790 .n(n)
23791 .k(8)
23792 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023793 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023794 }
23795 }
23796 }
23797
23798 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_eq_8_subtile_m) {
23799 TEST_REQUIRES_X86_XOP;
23800 for (uint32_t m = 1; m <= 2; m++) {
23801 GemmMicrokernelTester()
23802 .mr(2)
23803 .nr(4)
23804 .kr(8)
23805 .sr(1)
23806 .m(m)
23807 .n(4)
23808 .k(8)
23809 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023810 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023811 }
23812 }
23813
23814 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_eq_8_subtile_n) {
23815 TEST_REQUIRES_X86_XOP;
23816 for (uint32_t n = 1; n <= 4; n++) {
23817 GemmMicrokernelTester()
23818 .mr(2)
23819 .nr(4)
23820 .kr(8)
23821 .sr(1)
23822 .m(2)
23823 .n(n)
23824 .k(8)
23825 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023826 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023827 }
23828 }
23829
23830 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_lt_8) {
23831 TEST_REQUIRES_X86_XOP;
23832 for (size_t k = 1; k < 8; k++) {
23833 GemmMicrokernelTester()
23834 .mr(2)
23835 .nr(4)
23836 .kr(8)
23837 .sr(1)
23838 .m(2)
23839 .n(4)
23840 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080023841 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023842 }
23843 }
23844
23845 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_lt_8_strided_a) {
23846 TEST_REQUIRES_X86_XOP;
23847 for (size_t k = 1; k < 8; k++) {
23848 GemmMicrokernelTester()
23849 .mr(2)
23850 .nr(4)
23851 .kr(8)
23852 .sr(1)
23853 .m(2)
23854 .n(4)
23855 .k(k)
23856 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080023857 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023858 }
23859 }
23860
23861 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_lt_8_subtile) {
23862 TEST_REQUIRES_X86_XOP;
23863 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023864 for (uint32_t n = 1; n <= 4; n++) {
23865 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070023866 GemmMicrokernelTester()
23867 .mr(2)
23868 .nr(4)
23869 .kr(8)
23870 .sr(1)
23871 .m(m)
23872 .n(n)
23873 .k(k)
23874 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023875 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023876 }
23877 }
23878 }
23879 }
23880
23881 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_gt_8) {
23882 TEST_REQUIRES_X86_XOP;
23883 for (size_t k = 9; k < 16; k++) {
23884 GemmMicrokernelTester()
23885 .mr(2)
23886 .nr(4)
23887 .kr(8)
23888 .sr(1)
23889 .m(2)
23890 .n(4)
23891 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080023892 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023893 }
23894 }
23895
23896 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_gt_8_strided_a) {
23897 TEST_REQUIRES_X86_XOP;
23898 for (size_t k = 9; k < 16; k++) {
23899 GemmMicrokernelTester()
23900 .mr(2)
23901 .nr(4)
23902 .kr(8)
23903 .sr(1)
23904 .m(2)
23905 .n(4)
23906 .k(k)
23907 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080023908 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023909 }
23910 }
23911
23912 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_gt_8_subtile) {
23913 TEST_REQUIRES_X86_XOP;
23914 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023915 for (uint32_t n = 1; n <= 4; n++) {
23916 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070023917 GemmMicrokernelTester()
23918 .mr(2)
23919 .nr(4)
23920 .kr(8)
23921 .sr(1)
23922 .m(m)
23923 .n(n)
23924 .k(k)
23925 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023926 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023927 }
23928 }
23929 }
23930 }
23931
23932 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_div_8) {
23933 TEST_REQUIRES_X86_XOP;
23934 for (size_t k = 16; k <= 80; k += 8) {
23935 GemmMicrokernelTester()
23936 .mr(2)
23937 .nr(4)
23938 .kr(8)
23939 .sr(1)
23940 .m(2)
23941 .n(4)
23942 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080023943 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023944 }
23945 }
23946
23947 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_div_8_strided_a) {
23948 TEST_REQUIRES_X86_XOP;
23949 for (size_t k = 16; k <= 80; k += 8) {
23950 GemmMicrokernelTester()
23951 .mr(2)
23952 .nr(4)
23953 .kr(8)
23954 .sr(1)
23955 .m(2)
23956 .n(4)
23957 .k(k)
23958 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080023959 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023960 }
23961 }
23962
23963 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_div_8_subtile) {
23964 TEST_REQUIRES_X86_XOP;
23965 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023966 for (uint32_t n = 1; n <= 4; n++) {
23967 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070023968 GemmMicrokernelTester()
23969 .mr(2)
23970 .nr(4)
23971 .kr(8)
23972 .sr(1)
23973 .m(m)
23974 .n(n)
23975 .k(k)
23976 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023977 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023978 }
23979 }
23980 }
23981 }
23982
23983 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_gt_4) {
23984 TEST_REQUIRES_X86_XOP;
23985 for (uint32_t n = 5; n < 8; n++) {
23986 for (size_t k = 1; k <= 40; k += 9) {
23987 GemmMicrokernelTester()
23988 .mr(2)
23989 .nr(4)
23990 .kr(8)
23991 .sr(1)
23992 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023993 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070023994 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080023995 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070023996 }
23997 }
23998 }
23999
24000 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_gt_4_strided_cn) {
24001 TEST_REQUIRES_X86_XOP;
24002 for (uint32_t n = 5; n < 8; n++) {
24003 for (size_t k = 1; k <= 40; k += 9) {
24004 GemmMicrokernelTester()
24005 .mr(2)
24006 .nr(4)
24007 .kr(8)
24008 .sr(1)
24009 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024010 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070024011 .k(k)
24012 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080024013 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024014 }
24015 }
24016 }
24017
24018 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_gt_4_strided_a) {
24019 TEST_REQUIRES_X86_XOP;
24020 for (uint32_t n = 5; n < 8; n++) {
24021 for (size_t k = 1; k <= 40; k += 9) {
24022 GemmMicrokernelTester()
24023 .mr(2)
24024 .nr(4)
24025 .kr(8)
24026 .sr(1)
24027 .m(2)
24028 .n(n)
24029 .k(k)
24030 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080024031 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024032 }
24033 }
24034 }
24035
24036 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_gt_4_subtile) {
24037 TEST_REQUIRES_X86_XOP;
24038 for (uint32_t n = 5; n < 8; n++) {
24039 for (size_t k = 1; k <= 40; k += 9) {
24040 for (uint32_t m = 1; m <= 2; m++) {
24041 GemmMicrokernelTester()
24042 .mr(2)
24043 .nr(4)
24044 .kr(8)
24045 .sr(1)
24046 .m(m)
24047 .n(n)
24048 .k(k)
24049 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024050 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024051 }
24052 }
24053 }
24054 }
24055
24056 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_div_4) {
24057 TEST_REQUIRES_X86_XOP;
24058 for (uint32_t n = 8; n <= 12; n += 4) {
24059 for (size_t k = 1; k <= 40; k += 9) {
24060 GemmMicrokernelTester()
24061 .mr(2)
24062 .nr(4)
24063 .kr(8)
24064 .sr(1)
24065 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024066 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070024067 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080024068 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024069 }
24070 }
24071 }
24072
24073 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_div_4_strided_cn) {
24074 TEST_REQUIRES_X86_XOP;
24075 for (uint32_t n = 8; n <= 12; n += 4) {
24076 for (size_t k = 1; k <= 40; k += 9) {
24077 GemmMicrokernelTester()
24078 .mr(2)
24079 .nr(4)
24080 .kr(8)
24081 .sr(1)
24082 .m(2)
24083 .n(n)
24084 .k(k)
24085 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080024086 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024087 }
24088 }
24089 }
24090
24091 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_div_4_strided_a) {
24092 TEST_REQUIRES_X86_XOP;
24093 for (uint32_t n = 8; n <= 12; n += 4) {
24094 for (size_t k = 1; k <= 40; k += 9) {
24095 GemmMicrokernelTester()
24096 .mr(2)
24097 .nr(4)
24098 .kr(8)
24099 .sr(1)
24100 .m(2)
24101 .n(n)
24102 .k(k)
24103 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080024104 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024105 }
24106 }
24107 }
24108
24109 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_div_4_subtile) {
24110 TEST_REQUIRES_X86_XOP;
24111 for (uint32_t n = 8; n <= 12; n += 4) {
24112 for (size_t k = 1; k <= 40; k += 9) {
24113 for (uint32_t m = 1; m <= 2; m++) {
24114 GemmMicrokernelTester()
24115 .mr(2)
24116 .nr(4)
24117 .kr(8)
24118 .sr(1)
24119 .m(m)
24120 .n(n)
24121 .k(k)
24122 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024123 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024124 }
24125 }
24126 }
24127 }
24128
24129 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, strided_cm_subtile) {
24130 TEST_REQUIRES_X86_XOP;
24131 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024132 for (uint32_t n = 1; n <= 4; n++) {
24133 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070024134 GemmMicrokernelTester()
24135 .mr(2)
24136 .nr(4)
24137 .kr(8)
24138 .sr(1)
24139 .m(m)
24140 .n(n)
24141 .k(k)
24142 .cm_stride(7)
24143 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024144 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024145 }
24146 }
24147 }
24148 }
24149
24150 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, qmin) {
24151 TEST_REQUIRES_X86_XOP;
24152 GemmMicrokernelTester()
24153 .mr(2)
24154 .nr(4)
24155 .kr(8)
24156 .sr(1)
24157 .m(2)
24158 .n(4)
24159 .k(8)
24160 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080024161 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024162 }
24163
24164 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, qmax) {
24165 TEST_REQUIRES_X86_XOP;
24166 GemmMicrokernelTester()
24167 .mr(2)
24168 .nr(4)
24169 .kr(8)
24170 .sr(1)
24171 .m(2)
24172 .n(4)
24173 .k(8)
24174 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080024175 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024176 }
24177
24178 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__XOP_LD64, strided_cm) {
24179 TEST_REQUIRES_X86_XOP;
24180 GemmMicrokernelTester()
24181 .mr(2)
24182 .nr(4)
24183 .kr(8)
24184 .sr(1)
24185 .m(2)
24186 .n(4)
24187 .k(8)
24188 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080024189 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024190 }
24191#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
24192
24193
24194#if XNN_ARCH_X86 || XNN_ARCH_X86_64
24195 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_eq_8) {
24196 TEST_REQUIRES_X86_XOP;
24197 GemmMicrokernelTester()
24198 .mr(3)
24199 .nr(4)
24200 .kr(8)
24201 .sr(1)
24202 .m(3)
24203 .n(4)
24204 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080024205 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024206 }
24207
24208 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, strided_cn) {
24209 TEST_REQUIRES_X86_XOP;
24210 GemmMicrokernelTester()
24211 .mr(3)
24212 .nr(4)
24213 .kr(8)
24214 .sr(1)
24215 .m(3)
24216 .n(4)
24217 .k(8)
24218 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080024219 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024220 }
24221
24222 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_eq_8_strided_a) {
24223 TEST_REQUIRES_X86_XOP;
24224 GemmMicrokernelTester()
24225 .mr(3)
24226 .nr(4)
24227 .kr(8)
24228 .sr(1)
24229 .m(3)
24230 .n(4)
24231 .k(8)
24232 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080024233 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024234 }
24235
24236 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_eq_8_subtile) {
24237 TEST_REQUIRES_X86_XOP;
Zhi An Ng83844ae2022-01-14 09:52:25 -080024238 for (uint32_t n = 1; n <= 4; n++) {
24239 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070024240 GemmMicrokernelTester()
24241 .mr(3)
24242 .nr(4)
24243 .kr(8)
24244 .sr(1)
24245 .m(m)
24246 .n(n)
24247 .k(8)
24248 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024249 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024250 }
24251 }
24252 }
24253
24254 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_eq_8_subtile_m) {
24255 TEST_REQUIRES_X86_XOP;
24256 for (uint32_t m = 1; m <= 3; m++) {
24257 GemmMicrokernelTester()
24258 .mr(3)
24259 .nr(4)
24260 .kr(8)
24261 .sr(1)
24262 .m(m)
24263 .n(4)
24264 .k(8)
24265 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024266 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024267 }
24268 }
24269
24270 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_eq_8_subtile_n) {
24271 TEST_REQUIRES_X86_XOP;
24272 for (uint32_t n = 1; n <= 4; n++) {
24273 GemmMicrokernelTester()
24274 .mr(3)
24275 .nr(4)
24276 .kr(8)
24277 .sr(1)
24278 .m(3)
24279 .n(n)
24280 .k(8)
24281 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024282 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024283 }
24284 }
24285
24286 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_lt_8) {
24287 TEST_REQUIRES_X86_XOP;
24288 for (size_t k = 1; k < 8; k++) {
24289 GemmMicrokernelTester()
24290 .mr(3)
24291 .nr(4)
24292 .kr(8)
24293 .sr(1)
24294 .m(3)
24295 .n(4)
24296 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080024297 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024298 }
24299 }
24300
24301 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_lt_8_strided_a) {
24302 TEST_REQUIRES_X86_XOP;
24303 for (size_t k = 1; k < 8; k++) {
24304 GemmMicrokernelTester()
24305 .mr(3)
24306 .nr(4)
24307 .kr(8)
24308 .sr(1)
24309 .m(3)
24310 .n(4)
24311 .k(k)
24312 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080024313 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024314 }
24315 }
24316
24317 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_lt_8_subtile) {
24318 TEST_REQUIRES_X86_XOP;
24319 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024320 for (uint32_t n = 1; n <= 4; n++) {
24321 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070024322 GemmMicrokernelTester()
24323 .mr(3)
24324 .nr(4)
24325 .kr(8)
24326 .sr(1)
24327 .m(m)
24328 .n(n)
24329 .k(k)
24330 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024331 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024332 }
24333 }
24334 }
24335 }
24336
24337 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_gt_8) {
24338 TEST_REQUIRES_X86_XOP;
24339 for (size_t k = 9; k < 16; k++) {
24340 GemmMicrokernelTester()
24341 .mr(3)
24342 .nr(4)
24343 .kr(8)
24344 .sr(1)
24345 .m(3)
24346 .n(4)
24347 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080024348 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024349 }
24350 }
24351
24352 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_gt_8_strided_a) {
24353 TEST_REQUIRES_X86_XOP;
24354 for (size_t k = 9; k < 16; k++) {
24355 GemmMicrokernelTester()
24356 .mr(3)
24357 .nr(4)
24358 .kr(8)
24359 .sr(1)
24360 .m(3)
24361 .n(4)
24362 .k(k)
24363 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080024364 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024365 }
24366 }
24367
24368 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_gt_8_subtile) {
24369 TEST_REQUIRES_X86_XOP;
24370 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024371 for (uint32_t n = 1; n <= 4; n++) {
24372 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070024373 GemmMicrokernelTester()
24374 .mr(3)
24375 .nr(4)
24376 .kr(8)
24377 .sr(1)
24378 .m(m)
24379 .n(n)
24380 .k(k)
24381 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024382 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024383 }
24384 }
24385 }
24386 }
24387
24388 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_div_8) {
24389 TEST_REQUIRES_X86_XOP;
24390 for (size_t k = 16; k <= 80; k += 8) {
24391 GemmMicrokernelTester()
24392 .mr(3)
24393 .nr(4)
24394 .kr(8)
24395 .sr(1)
24396 .m(3)
24397 .n(4)
24398 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080024399 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024400 }
24401 }
24402
24403 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_div_8_strided_a) {
24404 TEST_REQUIRES_X86_XOP;
24405 for (size_t k = 16; k <= 80; k += 8) {
24406 GemmMicrokernelTester()
24407 .mr(3)
24408 .nr(4)
24409 .kr(8)
24410 .sr(1)
24411 .m(3)
24412 .n(4)
24413 .k(k)
24414 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080024415 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024416 }
24417 }
24418
24419 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_div_8_subtile) {
24420 TEST_REQUIRES_X86_XOP;
24421 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024422 for (uint32_t n = 1; n <= 4; n++) {
24423 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070024424 GemmMicrokernelTester()
24425 .mr(3)
24426 .nr(4)
24427 .kr(8)
24428 .sr(1)
24429 .m(m)
24430 .n(n)
24431 .k(k)
24432 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024433 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024434 }
24435 }
24436 }
24437 }
24438
24439 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_gt_4) {
24440 TEST_REQUIRES_X86_XOP;
24441 for (uint32_t n = 5; n < 8; n++) {
24442 for (size_t k = 1; k <= 40; k += 9) {
24443 GemmMicrokernelTester()
24444 .mr(3)
24445 .nr(4)
24446 .kr(8)
24447 .sr(1)
24448 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024449 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070024450 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080024451 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024452 }
24453 }
24454 }
24455
24456 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_gt_4_strided_cn) {
24457 TEST_REQUIRES_X86_XOP;
24458 for (uint32_t n = 5; n < 8; n++) {
24459 for (size_t k = 1; k <= 40; k += 9) {
24460 GemmMicrokernelTester()
24461 .mr(3)
24462 .nr(4)
24463 .kr(8)
24464 .sr(1)
24465 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024466 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070024467 .k(k)
24468 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080024469 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024470 }
24471 }
24472 }
24473
24474 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_gt_4_strided_a) {
24475 TEST_REQUIRES_X86_XOP;
24476 for (uint32_t n = 5; n < 8; n++) {
24477 for (size_t k = 1; k <= 40; k += 9) {
24478 GemmMicrokernelTester()
24479 .mr(3)
24480 .nr(4)
24481 .kr(8)
24482 .sr(1)
24483 .m(3)
24484 .n(n)
24485 .k(k)
24486 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080024487 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024488 }
24489 }
24490 }
24491
24492 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_gt_4_subtile) {
24493 TEST_REQUIRES_X86_XOP;
24494 for (uint32_t n = 5; n < 8; n++) {
24495 for (size_t k = 1; k <= 40; k += 9) {
24496 for (uint32_t m = 1; m <= 3; m++) {
24497 GemmMicrokernelTester()
24498 .mr(3)
24499 .nr(4)
24500 .kr(8)
24501 .sr(1)
24502 .m(m)
24503 .n(n)
24504 .k(k)
24505 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024506 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024507 }
24508 }
24509 }
24510 }
24511
24512 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_div_4) {
24513 TEST_REQUIRES_X86_XOP;
24514 for (uint32_t n = 8; n <= 12; n += 4) {
24515 for (size_t k = 1; k <= 40; k += 9) {
24516 GemmMicrokernelTester()
24517 .mr(3)
24518 .nr(4)
24519 .kr(8)
24520 .sr(1)
24521 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024522 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070024523 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080024524 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024525 }
24526 }
24527 }
24528
24529 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_div_4_strided_cn) {
24530 TEST_REQUIRES_X86_XOP;
24531 for (uint32_t n = 8; n <= 12; n += 4) {
24532 for (size_t k = 1; k <= 40; k += 9) {
24533 GemmMicrokernelTester()
24534 .mr(3)
24535 .nr(4)
24536 .kr(8)
24537 .sr(1)
24538 .m(3)
24539 .n(n)
24540 .k(k)
24541 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080024542 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024543 }
24544 }
24545 }
24546
24547 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_div_4_strided_a) {
24548 TEST_REQUIRES_X86_XOP;
24549 for (uint32_t n = 8; n <= 12; n += 4) {
24550 for (size_t k = 1; k <= 40; k += 9) {
24551 GemmMicrokernelTester()
24552 .mr(3)
24553 .nr(4)
24554 .kr(8)
24555 .sr(1)
24556 .m(3)
24557 .n(n)
24558 .k(k)
24559 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080024560 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024561 }
24562 }
24563 }
24564
24565 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_div_4_subtile) {
24566 TEST_REQUIRES_X86_XOP;
24567 for (uint32_t n = 8; n <= 12; n += 4) {
24568 for (size_t k = 1; k <= 40; k += 9) {
24569 for (uint32_t m = 1; m <= 3; m++) {
24570 GemmMicrokernelTester()
24571 .mr(3)
24572 .nr(4)
24573 .kr(8)
24574 .sr(1)
24575 .m(m)
24576 .n(n)
24577 .k(k)
24578 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024579 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024580 }
24581 }
24582 }
24583 }
24584
24585 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, strided_cm_subtile) {
24586 TEST_REQUIRES_X86_XOP;
24587 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024588 for (uint32_t n = 1; n <= 4; n++) {
24589 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070024590 GemmMicrokernelTester()
24591 .mr(3)
24592 .nr(4)
24593 .kr(8)
24594 .sr(1)
24595 .m(m)
24596 .n(n)
24597 .k(k)
24598 .cm_stride(7)
24599 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024600 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024601 }
24602 }
24603 }
24604 }
24605
24606 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, qmin) {
24607 TEST_REQUIRES_X86_XOP;
24608 GemmMicrokernelTester()
24609 .mr(3)
24610 .nr(4)
24611 .kr(8)
24612 .sr(1)
24613 .m(3)
24614 .n(4)
24615 .k(8)
24616 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080024617 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024618 }
24619
24620 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, qmax) {
24621 TEST_REQUIRES_X86_XOP;
24622 GemmMicrokernelTester()
24623 .mr(3)
24624 .nr(4)
24625 .kr(8)
24626 .sr(1)
24627 .m(3)
24628 .n(4)
24629 .k(8)
24630 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080024631 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024632 }
24633
24634 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, strided_cm) {
24635 TEST_REQUIRES_X86_XOP;
24636 GemmMicrokernelTester()
24637 .mr(3)
24638 .nr(4)
24639 .kr(8)
24640 .sr(1)
24641 .m(3)
24642 .n(4)
24643 .k(8)
24644 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080024645 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024646 }
24647#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
24648
24649
24650#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhanc46e6712021-06-01 19:00:16 -070024651 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_eq_8) {
24652 TEST_REQUIRES_X86_SSE2;
24653 GemmMicrokernelTester()
24654 .mr(3)
24655 .nr(4)
24656 .kr(8)
24657 .sr(1)
24658 .m(3)
24659 .n(4)
24660 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080024661 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024662 }
24663
24664 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128, strided_cn) {
24665 TEST_REQUIRES_X86_SSE2;
24666 GemmMicrokernelTester()
24667 .mr(3)
24668 .nr(4)
24669 .kr(8)
24670 .sr(1)
24671 .m(3)
24672 .n(4)
24673 .k(8)
24674 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080024675 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024676 }
24677
24678 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_eq_8_strided_a) {
24679 TEST_REQUIRES_X86_SSE2;
24680 GemmMicrokernelTester()
24681 .mr(3)
24682 .nr(4)
24683 .kr(8)
24684 .sr(1)
24685 .m(3)
24686 .n(4)
24687 .k(8)
24688 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080024689 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024690 }
24691
24692 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_eq_8_subtile) {
24693 TEST_REQUIRES_X86_SSE2;
Zhi An Ng83844ae2022-01-14 09:52:25 -080024694 for (uint32_t n = 1; n <= 4; n++) {
24695 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070024696 GemmMicrokernelTester()
24697 .mr(3)
24698 .nr(4)
24699 .kr(8)
24700 .sr(1)
24701 .m(m)
24702 .n(n)
24703 .k(8)
24704 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024705 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024706 }
24707 }
24708 }
24709
24710 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_eq_8_subtile_m) {
24711 TEST_REQUIRES_X86_SSE2;
24712 for (uint32_t m = 1; m <= 3; m++) {
24713 GemmMicrokernelTester()
24714 .mr(3)
24715 .nr(4)
24716 .kr(8)
24717 .sr(1)
24718 .m(m)
24719 .n(4)
24720 .k(8)
24721 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024722 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024723 }
24724 }
24725
24726 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_eq_8_subtile_n) {
24727 TEST_REQUIRES_X86_SSE2;
24728 for (uint32_t n = 1; n <= 4; n++) {
24729 GemmMicrokernelTester()
24730 .mr(3)
24731 .nr(4)
24732 .kr(8)
24733 .sr(1)
24734 .m(3)
24735 .n(n)
24736 .k(8)
24737 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024738 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024739 }
24740 }
24741
24742 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_lt_8) {
24743 TEST_REQUIRES_X86_SSE2;
24744 for (size_t k = 1; k < 8; k++) {
24745 GemmMicrokernelTester()
24746 .mr(3)
24747 .nr(4)
24748 .kr(8)
24749 .sr(1)
24750 .m(3)
24751 .n(4)
24752 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080024753 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024754 }
24755 }
24756
24757 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_lt_8_strided_a) {
24758 TEST_REQUIRES_X86_SSE2;
24759 for (size_t k = 1; k < 8; k++) {
24760 GemmMicrokernelTester()
24761 .mr(3)
24762 .nr(4)
24763 .kr(8)
24764 .sr(1)
24765 .m(3)
24766 .n(4)
24767 .k(k)
24768 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080024769 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024770 }
24771 }
24772
24773 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_lt_8_subtile) {
24774 TEST_REQUIRES_X86_SSE2;
24775 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024776 for (uint32_t n = 1; n <= 4; n++) {
24777 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070024778 GemmMicrokernelTester()
24779 .mr(3)
24780 .nr(4)
24781 .kr(8)
24782 .sr(1)
24783 .m(m)
24784 .n(n)
24785 .k(k)
24786 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024787 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024788 }
24789 }
24790 }
24791 }
24792
24793 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_gt_8) {
24794 TEST_REQUIRES_X86_SSE2;
24795 for (size_t k = 9; k < 16; k++) {
24796 GemmMicrokernelTester()
24797 .mr(3)
24798 .nr(4)
24799 .kr(8)
24800 .sr(1)
24801 .m(3)
24802 .n(4)
24803 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080024804 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024805 }
24806 }
24807
24808 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_gt_8_strided_a) {
24809 TEST_REQUIRES_X86_SSE2;
24810 for (size_t k = 9; k < 16; k++) {
24811 GemmMicrokernelTester()
24812 .mr(3)
24813 .nr(4)
24814 .kr(8)
24815 .sr(1)
24816 .m(3)
24817 .n(4)
24818 .k(k)
24819 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080024820 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024821 }
24822 }
24823
24824 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_gt_8_subtile) {
24825 TEST_REQUIRES_X86_SSE2;
24826 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024827 for (uint32_t n = 1; n <= 4; n++) {
24828 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070024829 GemmMicrokernelTester()
24830 .mr(3)
24831 .nr(4)
24832 .kr(8)
24833 .sr(1)
24834 .m(m)
24835 .n(n)
24836 .k(k)
24837 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024838 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024839 }
24840 }
24841 }
24842 }
24843
24844 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_div_8) {
24845 TEST_REQUIRES_X86_SSE2;
24846 for (size_t k = 16; k <= 80; k += 8) {
24847 GemmMicrokernelTester()
24848 .mr(3)
24849 .nr(4)
24850 .kr(8)
24851 .sr(1)
24852 .m(3)
24853 .n(4)
24854 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080024855 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024856 }
24857 }
24858
24859 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_div_8_strided_a) {
24860 TEST_REQUIRES_X86_SSE2;
24861 for (size_t k = 16; k <= 80; k += 8) {
24862 GemmMicrokernelTester()
24863 .mr(3)
24864 .nr(4)
24865 .kr(8)
24866 .sr(1)
24867 .m(3)
24868 .n(4)
24869 .k(k)
24870 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080024871 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024872 }
24873 }
24874
24875 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_div_8_subtile) {
24876 TEST_REQUIRES_X86_SSE2;
24877 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024878 for (uint32_t n = 1; n <= 4; n++) {
24879 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070024880 GemmMicrokernelTester()
24881 .mr(3)
24882 .nr(4)
24883 .kr(8)
24884 .sr(1)
24885 .m(m)
24886 .n(n)
24887 .k(k)
24888 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024889 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024890 }
24891 }
24892 }
24893 }
24894
24895 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128, n_gt_4) {
24896 TEST_REQUIRES_X86_SSE2;
24897 for (uint32_t n = 5; n < 8; n++) {
24898 for (size_t k = 1; k <= 40; k += 9) {
24899 GemmMicrokernelTester()
24900 .mr(3)
24901 .nr(4)
24902 .kr(8)
24903 .sr(1)
24904 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024905 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070024906 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080024907 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024908 }
24909 }
24910 }
24911
24912 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128, n_gt_4_strided_cn) {
24913 TEST_REQUIRES_X86_SSE2;
24914 for (uint32_t n = 5; n < 8; n++) {
24915 for (size_t k = 1; k <= 40; k += 9) {
24916 GemmMicrokernelTester()
24917 .mr(3)
24918 .nr(4)
24919 .kr(8)
24920 .sr(1)
24921 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024922 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070024923 .k(k)
24924 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080024925 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024926 }
24927 }
24928 }
24929
24930 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128, n_gt_4_strided_a) {
24931 TEST_REQUIRES_X86_SSE2;
24932 for (uint32_t n = 5; n < 8; n++) {
24933 for (size_t k = 1; k <= 40; k += 9) {
24934 GemmMicrokernelTester()
24935 .mr(3)
24936 .nr(4)
24937 .kr(8)
24938 .sr(1)
24939 .m(3)
24940 .n(n)
24941 .k(k)
24942 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080024943 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024944 }
24945 }
24946 }
24947
24948 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128, n_gt_4_subtile) {
24949 TEST_REQUIRES_X86_SSE2;
24950 for (uint32_t n = 5; n < 8; n++) {
24951 for (size_t k = 1; k <= 40; k += 9) {
24952 for (uint32_t m = 1; m <= 3; m++) {
24953 GemmMicrokernelTester()
24954 .mr(3)
24955 .nr(4)
24956 .kr(8)
24957 .sr(1)
24958 .m(m)
24959 .n(n)
24960 .k(k)
24961 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024962 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024963 }
24964 }
24965 }
24966 }
24967
24968 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128, n_div_4) {
24969 TEST_REQUIRES_X86_SSE2;
24970 for (uint32_t n = 8; n <= 12; n += 4) {
24971 for (size_t k = 1; k <= 40; k += 9) {
24972 GemmMicrokernelTester()
24973 .mr(3)
24974 .nr(4)
24975 .kr(8)
24976 .sr(1)
24977 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024978 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070024979 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080024980 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024981 }
24982 }
24983 }
24984
24985 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128, n_div_4_strided_cn) {
24986 TEST_REQUIRES_X86_SSE2;
24987 for (uint32_t n = 8; n <= 12; n += 4) {
24988 for (size_t k = 1; k <= 40; k += 9) {
24989 GemmMicrokernelTester()
24990 .mr(3)
24991 .nr(4)
24992 .kr(8)
24993 .sr(1)
24994 .m(3)
24995 .n(n)
24996 .k(k)
24997 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080024998 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070024999 }
25000 }
25001 }
25002
25003 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128, n_div_4_strided_a) {
25004 TEST_REQUIRES_X86_SSE2;
25005 for (uint32_t n = 8; n <= 12; n += 4) {
25006 for (size_t k = 1; k <= 40; k += 9) {
25007 GemmMicrokernelTester()
25008 .mr(3)
25009 .nr(4)
25010 .kr(8)
25011 .sr(1)
25012 .m(3)
25013 .n(n)
25014 .k(k)
25015 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080025016 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025017 }
25018 }
25019 }
25020
25021 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128, n_div_4_subtile) {
25022 TEST_REQUIRES_X86_SSE2;
25023 for (uint32_t n = 8; n <= 12; n += 4) {
25024 for (size_t k = 1; k <= 40; k += 9) {
25025 for (uint32_t m = 1; m <= 3; m++) {
25026 GemmMicrokernelTester()
25027 .mr(3)
25028 .nr(4)
25029 .kr(8)
25030 .sr(1)
25031 .m(m)
25032 .n(n)
25033 .k(k)
25034 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025035 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025036 }
25037 }
25038 }
25039 }
25040
25041 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128, strided_cm_subtile) {
25042 TEST_REQUIRES_X86_SSE2;
25043 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025044 for (uint32_t n = 1; n <= 4; n++) {
25045 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070025046 GemmMicrokernelTester()
25047 .mr(3)
25048 .nr(4)
25049 .kr(8)
25050 .sr(1)
25051 .m(m)
25052 .n(n)
25053 .k(k)
25054 .cm_stride(7)
25055 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025056 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025057 }
25058 }
25059 }
25060 }
25061
25062 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128, qmin) {
25063 TEST_REQUIRES_X86_SSE2;
25064 GemmMicrokernelTester()
25065 .mr(3)
25066 .nr(4)
25067 .kr(8)
25068 .sr(1)
25069 .m(3)
25070 .n(4)
25071 .k(8)
25072 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080025073 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025074 }
25075
25076 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128, qmax) {
25077 TEST_REQUIRES_X86_SSE2;
25078 GemmMicrokernelTester()
25079 .mr(3)
25080 .nr(4)
25081 .kr(8)
25082 .sr(1)
25083 .m(3)
25084 .n(4)
25085 .k(8)
25086 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080025087 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025088 }
25089
25090 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__SSE2_LD128, strided_cm) {
25091 TEST_REQUIRES_X86_SSE2;
25092 GemmMicrokernelTester()
25093 .mr(3)
25094 .nr(4)
25095 .kr(8)
25096 .sr(1)
25097 .m(3)
25098 .n(4)
25099 .k(8)
25100 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080025101 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025102 }
25103#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
25104
25105
25106#if XNN_ARCH_X86 || XNN_ARCH_X86_64
25107 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSSE3_LD128, k_eq_8) {
25108 TEST_REQUIRES_X86_SSSE3;
25109 GemmMicrokernelTester()
25110 .mr(1)
25111 .nr(4)
25112 .kr(8)
25113 .sr(1)
25114 .m(1)
25115 .n(4)
25116 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080025117 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025118 }
25119
25120 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSSE3_LD128, strided_cn) {
25121 TEST_REQUIRES_X86_SSSE3;
25122 GemmMicrokernelTester()
25123 .mr(1)
25124 .nr(4)
25125 .kr(8)
25126 .sr(1)
25127 .m(1)
25128 .n(4)
25129 .k(8)
25130 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080025131 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025132 }
25133
25134 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSSE3_LD128, k_eq_8_strided_a) {
25135 TEST_REQUIRES_X86_SSSE3;
25136 GemmMicrokernelTester()
25137 .mr(1)
25138 .nr(4)
25139 .kr(8)
25140 .sr(1)
25141 .m(1)
25142 .n(4)
25143 .k(8)
25144 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080025145 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025146 }
25147
25148 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSSE3_LD128, k_eq_8_subtile) {
25149 TEST_REQUIRES_X86_SSSE3;
Zhi An Ng83844ae2022-01-14 09:52:25 -080025150 for (uint32_t n = 1; n <= 4; n++) {
25151 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070025152 GemmMicrokernelTester()
25153 .mr(1)
25154 .nr(4)
25155 .kr(8)
25156 .sr(1)
25157 .m(m)
25158 .n(n)
25159 .k(8)
25160 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025161 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025162 }
25163 }
25164 }
25165
25166 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSSE3_LD128, k_eq_8_subtile_m) {
25167 TEST_REQUIRES_X86_SSSE3;
25168 for (uint32_t m = 1; m <= 1; m++) {
25169 GemmMicrokernelTester()
25170 .mr(1)
25171 .nr(4)
25172 .kr(8)
25173 .sr(1)
25174 .m(m)
25175 .n(4)
25176 .k(8)
25177 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025178 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025179 }
25180 }
25181
25182 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSSE3_LD128, k_eq_8_subtile_n) {
25183 TEST_REQUIRES_X86_SSSE3;
25184 for (uint32_t n = 1; n <= 4; n++) {
25185 GemmMicrokernelTester()
25186 .mr(1)
25187 .nr(4)
25188 .kr(8)
25189 .sr(1)
25190 .m(1)
25191 .n(n)
25192 .k(8)
25193 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025194 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025195 }
25196 }
25197
25198 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSSE3_LD128, k_lt_8) {
25199 TEST_REQUIRES_X86_SSSE3;
25200 for (size_t k = 1; k < 8; k++) {
25201 GemmMicrokernelTester()
25202 .mr(1)
25203 .nr(4)
25204 .kr(8)
25205 .sr(1)
25206 .m(1)
25207 .n(4)
25208 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080025209 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025210 }
25211 }
25212
25213 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSSE3_LD128, k_lt_8_strided_a) {
25214 TEST_REQUIRES_X86_SSSE3;
25215 for (size_t k = 1; k < 8; k++) {
25216 GemmMicrokernelTester()
25217 .mr(1)
25218 .nr(4)
25219 .kr(8)
25220 .sr(1)
25221 .m(1)
25222 .n(4)
25223 .k(k)
25224 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080025225 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025226 }
25227 }
25228
25229 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSSE3_LD128, k_lt_8_subtile) {
25230 TEST_REQUIRES_X86_SSSE3;
25231 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025232 for (uint32_t n = 1; n <= 4; n++) {
25233 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070025234 GemmMicrokernelTester()
25235 .mr(1)
25236 .nr(4)
25237 .kr(8)
25238 .sr(1)
25239 .m(m)
25240 .n(n)
25241 .k(k)
25242 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025243 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025244 }
25245 }
25246 }
25247 }
25248
25249 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSSE3_LD128, k_gt_8) {
25250 TEST_REQUIRES_X86_SSSE3;
25251 for (size_t k = 9; k < 16; k++) {
25252 GemmMicrokernelTester()
25253 .mr(1)
25254 .nr(4)
25255 .kr(8)
25256 .sr(1)
25257 .m(1)
25258 .n(4)
25259 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080025260 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025261 }
25262 }
25263
25264 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSSE3_LD128, k_gt_8_strided_a) {
25265 TEST_REQUIRES_X86_SSSE3;
25266 for (size_t k = 9; k < 16; k++) {
25267 GemmMicrokernelTester()
25268 .mr(1)
25269 .nr(4)
25270 .kr(8)
25271 .sr(1)
25272 .m(1)
25273 .n(4)
25274 .k(k)
25275 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080025276 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025277 }
25278 }
25279
25280 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSSE3_LD128, k_gt_8_subtile) {
25281 TEST_REQUIRES_X86_SSSE3;
25282 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025283 for (uint32_t n = 1; n <= 4; n++) {
25284 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070025285 GemmMicrokernelTester()
25286 .mr(1)
25287 .nr(4)
25288 .kr(8)
25289 .sr(1)
25290 .m(m)
25291 .n(n)
25292 .k(k)
25293 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025294 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025295 }
25296 }
25297 }
25298 }
25299
25300 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSSE3_LD128, k_div_8) {
25301 TEST_REQUIRES_X86_SSSE3;
25302 for (size_t k = 16; k <= 80; k += 8) {
25303 GemmMicrokernelTester()
25304 .mr(1)
25305 .nr(4)
25306 .kr(8)
25307 .sr(1)
25308 .m(1)
25309 .n(4)
25310 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080025311 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025312 }
25313 }
25314
25315 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSSE3_LD128, k_div_8_strided_a) {
25316 TEST_REQUIRES_X86_SSSE3;
25317 for (size_t k = 16; k <= 80; k += 8) {
25318 GemmMicrokernelTester()
25319 .mr(1)
25320 .nr(4)
25321 .kr(8)
25322 .sr(1)
25323 .m(1)
25324 .n(4)
25325 .k(k)
25326 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080025327 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025328 }
25329 }
25330
25331 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSSE3_LD128, k_div_8_subtile) {
25332 TEST_REQUIRES_X86_SSSE3;
25333 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025334 for (uint32_t n = 1; n <= 4; n++) {
25335 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070025336 GemmMicrokernelTester()
25337 .mr(1)
25338 .nr(4)
25339 .kr(8)
25340 .sr(1)
25341 .m(m)
25342 .n(n)
25343 .k(k)
25344 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025345 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025346 }
25347 }
25348 }
25349 }
25350
25351 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSSE3_LD128, n_gt_4) {
25352 TEST_REQUIRES_X86_SSSE3;
25353 for (uint32_t n = 5; n < 8; n++) {
25354 for (size_t k = 1; k <= 40; k += 9) {
25355 GemmMicrokernelTester()
25356 .mr(1)
25357 .nr(4)
25358 .kr(8)
25359 .sr(1)
25360 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025361 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070025362 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080025363 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025364 }
25365 }
25366 }
25367
25368 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSSE3_LD128, n_gt_4_strided_cn) {
25369 TEST_REQUIRES_X86_SSSE3;
25370 for (uint32_t n = 5; n < 8; n++) {
25371 for (size_t k = 1; k <= 40; k += 9) {
25372 GemmMicrokernelTester()
25373 .mr(1)
25374 .nr(4)
25375 .kr(8)
25376 .sr(1)
25377 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025378 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070025379 .k(k)
25380 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080025381 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025382 }
25383 }
25384 }
25385
25386 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSSE3_LD128, n_gt_4_strided_a) {
25387 TEST_REQUIRES_X86_SSSE3;
25388 for (uint32_t n = 5; n < 8; n++) {
25389 for (size_t k = 1; k <= 40; k += 9) {
25390 GemmMicrokernelTester()
25391 .mr(1)
25392 .nr(4)
25393 .kr(8)
25394 .sr(1)
25395 .m(1)
25396 .n(n)
25397 .k(k)
25398 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080025399 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025400 }
25401 }
25402 }
25403
25404 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSSE3_LD128, n_gt_4_subtile) {
25405 TEST_REQUIRES_X86_SSSE3;
25406 for (uint32_t n = 5; n < 8; n++) {
25407 for (size_t k = 1; k <= 40; k += 9) {
25408 for (uint32_t m = 1; m <= 1; m++) {
25409 GemmMicrokernelTester()
25410 .mr(1)
25411 .nr(4)
25412 .kr(8)
25413 .sr(1)
25414 .m(m)
25415 .n(n)
25416 .k(k)
25417 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025418 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025419 }
25420 }
25421 }
25422 }
25423
25424 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSSE3_LD128, n_div_4) {
25425 TEST_REQUIRES_X86_SSSE3;
25426 for (uint32_t n = 8; n <= 12; n += 4) {
25427 for (size_t k = 1; k <= 40; k += 9) {
25428 GemmMicrokernelTester()
25429 .mr(1)
25430 .nr(4)
25431 .kr(8)
25432 .sr(1)
25433 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025434 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070025435 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080025436 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025437 }
25438 }
25439 }
25440
25441 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSSE3_LD128, n_div_4_strided_cn) {
25442 TEST_REQUIRES_X86_SSSE3;
25443 for (uint32_t n = 8; n <= 12; n += 4) {
25444 for (size_t k = 1; k <= 40; k += 9) {
25445 GemmMicrokernelTester()
25446 .mr(1)
25447 .nr(4)
25448 .kr(8)
25449 .sr(1)
25450 .m(1)
25451 .n(n)
25452 .k(k)
25453 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080025454 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025455 }
25456 }
25457 }
25458
25459 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSSE3_LD128, n_div_4_strided_a) {
25460 TEST_REQUIRES_X86_SSSE3;
25461 for (uint32_t n = 8; n <= 12; n += 4) {
25462 for (size_t k = 1; k <= 40; k += 9) {
25463 GemmMicrokernelTester()
25464 .mr(1)
25465 .nr(4)
25466 .kr(8)
25467 .sr(1)
25468 .m(1)
25469 .n(n)
25470 .k(k)
25471 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080025472 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025473 }
25474 }
25475 }
25476
25477 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSSE3_LD128, n_div_4_subtile) {
25478 TEST_REQUIRES_X86_SSSE3;
25479 for (uint32_t n = 8; n <= 12; n += 4) {
25480 for (size_t k = 1; k <= 40; k += 9) {
25481 for (uint32_t m = 1; m <= 1; m++) {
25482 GemmMicrokernelTester()
25483 .mr(1)
25484 .nr(4)
25485 .kr(8)
25486 .sr(1)
25487 .m(m)
25488 .n(n)
25489 .k(k)
25490 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025491 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025492 }
25493 }
25494 }
25495 }
25496
25497 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSSE3_LD128, strided_cm_subtile) {
25498 TEST_REQUIRES_X86_SSSE3;
25499 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025500 for (uint32_t n = 1; n <= 4; n++) {
25501 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070025502 GemmMicrokernelTester()
25503 .mr(1)
25504 .nr(4)
25505 .kr(8)
25506 .sr(1)
25507 .m(m)
25508 .n(n)
25509 .k(k)
25510 .cm_stride(7)
25511 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025512 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025513 }
25514 }
25515 }
25516 }
25517
25518 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSSE3_LD128, qmin) {
25519 TEST_REQUIRES_X86_SSSE3;
25520 GemmMicrokernelTester()
25521 .mr(1)
25522 .nr(4)
25523 .kr(8)
25524 .sr(1)
25525 .m(1)
25526 .n(4)
25527 .k(8)
25528 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080025529 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025530 }
25531
25532 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSSE3_LD128, qmax) {
25533 TEST_REQUIRES_X86_SSSE3;
25534 GemmMicrokernelTester()
25535 .mr(1)
25536 .nr(4)
25537 .kr(8)
25538 .sr(1)
25539 .m(1)
25540 .n(4)
25541 .k(8)
25542 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080025543 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025544 }
25545
25546 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSSE3_LD128, strided_cm) {
25547 TEST_REQUIRES_X86_SSSE3;
25548 GemmMicrokernelTester()
25549 .mr(1)
25550 .nr(4)
25551 .kr(8)
25552 .sr(1)
25553 .m(1)
25554 .n(4)
25555 .k(8)
25556 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080025557 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025558 }
25559#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
25560
25561
25562#if XNN_ARCH_X86 || XNN_ARCH_X86_64
25563 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSSE3_LD128, k_eq_8) {
25564 TEST_REQUIRES_X86_SSSE3;
25565 GemmMicrokernelTester()
25566 .mr(2)
25567 .nr(4)
25568 .kr(8)
25569 .sr(1)
25570 .m(2)
25571 .n(4)
25572 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080025573 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025574 }
25575
25576 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSSE3_LD128, strided_cn) {
25577 TEST_REQUIRES_X86_SSSE3;
25578 GemmMicrokernelTester()
25579 .mr(2)
25580 .nr(4)
25581 .kr(8)
25582 .sr(1)
25583 .m(2)
25584 .n(4)
25585 .k(8)
25586 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080025587 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025588 }
25589
25590 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSSE3_LD128, k_eq_8_strided_a) {
25591 TEST_REQUIRES_X86_SSSE3;
25592 GemmMicrokernelTester()
25593 .mr(2)
25594 .nr(4)
25595 .kr(8)
25596 .sr(1)
25597 .m(2)
25598 .n(4)
25599 .k(8)
25600 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080025601 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025602 }
25603
25604 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSSE3_LD128, k_eq_8_subtile) {
25605 TEST_REQUIRES_X86_SSSE3;
Zhi An Ng83844ae2022-01-14 09:52:25 -080025606 for (uint32_t n = 1; n <= 4; n++) {
25607 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070025608 GemmMicrokernelTester()
25609 .mr(2)
25610 .nr(4)
25611 .kr(8)
25612 .sr(1)
25613 .m(m)
25614 .n(n)
25615 .k(8)
25616 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025617 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025618 }
25619 }
25620 }
25621
25622 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSSE3_LD128, k_eq_8_subtile_m) {
25623 TEST_REQUIRES_X86_SSSE3;
25624 for (uint32_t m = 1; m <= 2; m++) {
25625 GemmMicrokernelTester()
25626 .mr(2)
25627 .nr(4)
25628 .kr(8)
25629 .sr(1)
25630 .m(m)
25631 .n(4)
25632 .k(8)
25633 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025634 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025635 }
25636 }
25637
25638 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSSE3_LD128, k_eq_8_subtile_n) {
25639 TEST_REQUIRES_X86_SSSE3;
25640 for (uint32_t n = 1; n <= 4; n++) {
25641 GemmMicrokernelTester()
25642 .mr(2)
25643 .nr(4)
25644 .kr(8)
25645 .sr(1)
25646 .m(2)
25647 .n(n)
25648 .k(8)
25649 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025650 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025651 }
25652 }
25653
25654 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSSE3_LD128, k_lt_8) {
25655 TEST_REQUIRES_X86_SSSE3;
25656 for (size_t k = 1; k < 8; k++) {
25657 GemmMicrokernelTester()
25658 .mr(2)
25659 .nr(4)
25660 .kr(8)
25661 .sr(1)
25662 .m(2)
25663 .n(4)
25664 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080025665 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025666 }
25667 }
25668
25669 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSSE3_LD128, k_lt_8_strided_a) {
25670 TEST_REQUIRES_X86_SSSE3;
25671 for (size_t k = 1; k < 8; k++) {
25672 GemmMicrokernelTester()
25673 .mr(2)
25674 .nr(4)
25675 .kr(8)
25676 .sr(1)
25677 .m(2)
25678 .n(4)
25679 .k(k)
25680 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080025681 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025682 }
25683 }
25684
25685 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSSE3_LD128, k_lt_8_subtile) {
25686 TEST_REQUIRES_X86_SSSE3;
25687 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025688 for (uint32_t n = 1; n <= 4; n++) {
25689 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070025690 GemmMicrokernelTester()
25691 .mr(2)
25692 .nr(4)
25693 .kr(8)
25694 .sr(1)
25695 .m(m)
25696 .n(n)
25697 .k(k)
25698 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025699 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025700 }
25701 }
25702 }
25703 }
25704
25705 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSSE3_LD128, k_gt_8) {
25706 TEST_REQUIRES_X86_SSSE3;
25707 for (size_t k = 9; k < 16; k++) {
25708 GemmMicrokernelTester()
25709 .mr(2)
25710 .nr(4)
25711 .kr(8)
25712 .sr(1)
25713 .m(2)
25714 .n(4)
25715 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080025716 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025717 }
25718 }
25719
25720 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSSE3_LD128, k_gt_8_strided_a) {
25721 TEST_REQUIRES_X86_SSSE3;
25722 for (size_t k = 9; k < 16; k++) {
25723 GemmMicrokernelTester()
25724 .mr(2)
25725 .nr(4)
25726 .kr(8)
25727 .sr(1)
25728 .m(2)
25729 .n(4)
25730 .k(k)
25731 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080025732 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025733 }
25734 }
25735
25736 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSSE3_LD128, k_gt_8_subtile) {
25737 TEST_REQUIRES_X86_SSSE3;
25738 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025739 for (uint32_t n = 1; n <= 4; n++) {
25740 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070025741 GemmMicrokernelTester()
25742 .mr(2)
25743 .nr(4)
25744 .kr(8)
25745 .sr(1)
25746 .m(m)
25747 .n(n)
25748 .k(k)
25749 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025750 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025751 }
25752 }
25753 }
25754 }
25755
25756 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSSE3_LD128, k_div_8) {
25757 TEST_REQUIRES_X86_SSSE3;
25758 for (size_t k = 16; k <= 80; k += 8) {
25759 GemmMicrokernelTester()
25760 .mr(2)
25761 .nr(4)
25762 .kr(8)
25763 .sr(1)
25764 .m(2)
25765 .n(4)
25766 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080025767 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025768 }
25769 }
25770
25771 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSSE3_LD128, k_div_8_strided_a) {
25772 TEST_REQUIRES_X86_SSSE3;
25773 for (size_t k = 16; k <= 80; k += 8) {
25774 GemmMicrokernelTester()
25775 .mr(2)
25776 .nr(4)
25777 .kr(8)
25778 .sr(1)
25779 .m(2)
25780 .n(4)
25781 .k(k)
25782 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080025783 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025784 }
25785 }
25786
25787 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSSE3_LD128, k_div_8_subtile) {
25788 TEST_REQUIRES_X86_SSSE3;
25789 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025790 for (uint32_t n = 1; n <= 4; n++) {
25791 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070025792 GemmMicrokernelTester()
25793 .mr(2)
25794 .nr(4)
25795 .kr(8)
25796 .sr(1)
25797 .m(m)
25798 .n(n)
25799 .k(k)
25800 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025801 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025802 }
25803 }
25804 }
25805 }
25806
25807 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSSE3_LD128, n_gt_4) {
25808 TEST_REQUIRES_X86_SSSE3;
25809 for (uint32_t n = 5; n < 8; n++) {
25810 for (size_t k = 1; k <= 40; k += 9) {
25811 GemmMicrokernelTester()
25812 .mr(2)
25813 .nr(4)
25814 .kr(8)
25815 .sr(1)
25816 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025817 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070025818 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080025819 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025820 }
25821 }
25822 }
25823
25824 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSSE3_LD128, n_gt_4_strided_cn) {
25825 TEST_REQUIRES_X86_SSSE3;
25826 for (uint32_t n = 5; n < 8; n++) {
25827 for (size_t k = 1; k <= 40; k += 9) {
25828 GemmMicrokernelTester()
25829 .mr(2)
25830 .nr(4)
25831 .kr(8)
25832 .sr(1)
25833 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025834 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070025835 .k(k)
25836 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080025837 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025838 }
25839 }
25840 }
25841
25842 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSSE3_LD128, n_gt_4_strided_a) {
25843 TEST_REQUIRES_X86_SSSE3;
25844 for (uint32_t n = 5; n < 8; n++) {
25845 for (size_t k = 1; k <= 40; k += 9) {
25846 GemmMicrokernelTester()
25847 .mr(2)
25848 .nr(4)
25849 .kr(8)
25850 .sr(1)
25851 .m(2)
25852 .n(n)
25853 .k(k)
25854 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080025855 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025856 }
25857 }
25858 }
25859
25860 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSSE3_LD128, n_gt_4_subtile) {
25861 TEST_REQUIRES_X86_SSSE3;
25862 for (uint32_t n = 5; n < 8; n++) {
25863 for (size_t k = 1; k <= 40; k += 9) {
25864 for (uint32_t m = 1; m <= 2; m++) {
25865 GemmMicrokernelTester()
25866 .mr(2)
25867 .nr(4)
25868 .kr(8)
25869 .sr(1)
25870 .m(m)
25871 .n(n)
25872 .k(k)
25873 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025874 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025875 }
25876 }
25877 }
25878 }
25879
25880 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSSE3_LD128, n_div_4) {
25881 TEST_REQUIRES_X86_SSSE3;
25882 for (uint32_t n = 8; n <= 12; n += 4) {
25883 for (size_t k = 1; k <= 40; k += 9) {
25884 GemmMicrokernelTester()
25885 .mr(2)
25886 .nr(4)
25887 .kr(8)
25888 .sr(1)
25889 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025890 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070025891 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080025892 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025893 }
25894 }
25895 }
25896
25897 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSSE3_LD128, n_div_4_strided_cn) {
25898 TEST_REQUIRES_X86_SSSE3;
25899 for (uint32_t n = 8; n <= 12; n += 4) {
25900 for (size_t k = 1; k <= 40; k += 9) {
25901 GemmMicrokernelTester()
25902 .mr(2)
25903 .nr(4)
25904 .kr(8)
25905 .sr(1)
25906 .m(2)
25907 .n(n)
25908 .k(k)
25909 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080025910 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025911 }
25912 }
25913 }
25914
25915 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSSE3_LD128, n_div_4_strided_a) {
25916 TEST_REQUIRES_X86_SSSE3;
25917 for (uint32_t n = 8; n <= 12; n += 4) {
25918 for (size_t k = 1; k <= 40; k += 9) {
25919 GemmMicrokernelTester()
25920 .mr(2)
25921 .nr(4)
25922 .kr(8)
25923 .sr(1)
25924 .m(2)
25925 .n(n)
25926 .k(k)
25927 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080025928 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025929 }
25930 }
25931 }
25932
25933 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSSE3_LD128, n_div_4_subtile) {
25934 TEST_REQUIRES_X86_SSSE3;
25935 for (uint32_t n = 8; n <= 12; n += 4) {
25936 for (size_t k = 1; k <= 40; k += 9) {
25937 for (uint32_t m = 1; m <= 2; m++) {
25938 GemmMicrokernelTester()
25939 .mr(2)
25940 .nr(4)
25941 .kr(8)
25942 .sr(1)
25943 .m(m)
25944 .n(n)
25945 .k(k)
25946 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025947 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025948 }
25949 }
25950 }
25951 }
25952
25953 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSSE3_LD128, strided_cm_subtile) {
25954 TEST_REQUIRES_X86_SSSE3;
25955 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025956 for (uint32_t n = 1; n <= 4; n++) {
25957 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070025958 GemmMicrokernelTester()
25959 .mr(2)
25960 .nr(4)
25961 .kr(8)
25962 .sr(1)
25963 .m(m)
25964 .n(n)
25965 .k(k)
25966 .cm_stride(7)
25967 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025968 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025969 }
25970 }
25971 }
25972 }
25973
25974 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSSE3_LD128, qmin) {
25975 TEST_REQUIRES_X86_SSSE3;
25976 GemmMicrokernelTester()
25977 .mr(2)
25978 .nr(4)
25979 .kr(8)
25980 .sr(1)
25981 .m(2)
25982 .n(4)
25983 .k(8)
25984 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080025985 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070025986 }
25987
25988 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSSE3_LD128, qmax) {
25989 TEST_REQUIRES_X86_SSSE3;
25990 GemmMicrokernelTester()
25991 .mr(2)
25992 .nr(4)
25993 .kr(8)
25994 .sr(1)
25995 .m(2)
25996 .n(4)
25997 .k(8)
25998 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080025999 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026000 }
26001
26002 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSSE3_LD128, strided_cm) {
26003 TEST_REQUIRES_X86_SSSE3;
26004 GemmMicrokernelTester()
26005 .mr(2)
26006 .nr(4)
26007 .kr(8)
26008 .sr(1)
26009 .m(2)
26010 .n(4)
26011 .k(8)
26012 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080026013 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026014 }
26015#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
26016
26017
26018#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhanc46e6712021-06-01 19:00:16 -070026019 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_eq_8) {
26020 TEST_REQUIRES_X86_SSE41;
26021 GemmMicrokernelTester()
26022 .mr(1)
26023 .nr(4)
26024 .kr(8)
26025 .sr(1)
26026 .m(1)
26027 .n(4)
26028 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080026029 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026030 }
26031
26032 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, strided_cn) {
26033 TEST_REQUIRES_X86_SSE41;
26034 GemmMicrokernelTester()
26035 .mr(1)
26036 .nr(4)
26037 .kr(8)
26038 .sr(1)
26039 .m(1)
26040 .n(4)
26041 .k(8)
26042 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080026043 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026044 }
26045
26046 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_eq_8_strided_a) {
26047 TEST_REQUIRES_X86_SSE41;
26048 GemmMicrokernelTester()
26049 .mr(1)
26050 .nr(4)
26051 .kr(8)
26052 .sr(1)
26053 .m(1)
26054 .n(4)
26055 .k(8)
26056 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080026057 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026058 }
26059
26060 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_eq_8_subtile) {
26061 TEST_REQUIRES_X86_SSE41;
Zhi An Ng83844ae2022-01-14 09:52:25 -080026062 for (uint32_t n = 1; n <= 4; n++) {
26063 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070026064 GemmMicrokernelTester()
26065 .mr(1)
26066 .nr(4)
26067 .kr(8)
26068 .sr(1)
26069 .m(m)
26070 .n(n)
26071 .k(8)
26072 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026073 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026074 }
26075 }
26076 }
26077
26078 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_eq_8_subtile_m) {
26079 TEST_REQUIRES_X86_SSE41;
26080 for (uint32_t m = 1; m <= 1; m++) {
26081 GemmMicrokernelTester()
26082 .mr(1)
26083 .nr(4)
26084 .kr(8)
26085 .sr(1)
26086 .m(m)
26087 .n(4)
26088 .k(8)
26089 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026090 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026091 }
26092 }
26093
26094 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_eq_8_subtile_n) {
26095 TEST_REQUIRES_X86_SSE41;
26096 for (uint32_t n = 1; n <= 4; n++) {
26097 GemmMicrokernelTester()
26098 .mr(1)
26099 .nr(4)
26100 .kr(8)
26101 .sr(1)
26102 .m(1)
26103 .n(n)
26104 .k(8)
26105 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026106 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026107 }
26108 }
26109
26110 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_lt_8) {
26111 TEST_REQUIRES_X86_SSE41;
26112 for (size_t k = 1; k < 8; k++) {
26113 GemmMicrokernelTester()
26114 .mr(1)
26115 .nr(4)
26116 .kr(8)
26117 .sr(1)
26118 .m(1)
26119 .n(4)
26120 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080026121 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026122 }
26123 }
26124
26125 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_lt_8_strided_a) {
26126 TEST_REQUIRES_X86_SSE41;
26127 for (size_t k = 1; k < 8; k++) {
26128 GemmMicrokernelTester()
26129 .mr(1)
26130 .nr(4)
26131 .kr(8)
26132 .sr(1)
26133 .m(1)
26134 .n(4)
26135 .k(k)
26136 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080026137 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026138 }
26139 }
26140
26141 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_lt_8_subtile) {
26142 TEST_REQUIRES_X86_SSE41;
26143 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026144 for (uint32_t n = 1; n <= 4; n++) {
26145 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070026146 GemmMicrokernelTester()
26147 .mr(1)
26148 .nr(4)
26149 .kr(8)
26150 .sr(1)
26151 .m(m)
26152 .n(n)
26153 .k(k)
26154 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026155 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026156 }
26157 }
26158 }
26159 }
26160
26161 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_gt_8) {
26162 TEST_REQUIRES_X86_SSE41;
26163 for (size_t k = 9; k < 16; k++) {
26164 GemmMicrokernelTester()
26165 .mr(1)
26166 .nr(4)
26167 .kr(8)
26168 .sr(1)
26169 .m(1)
26170 .n(4)
26171 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080026172 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026173 }
26174 }
26175
26176 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_gt_8_strided_a) {
26177 TEST_REQUIRES_X86_SSE41;
26178 for (size_t k = 9; k < 16; k++) {
26179 GemmMicrokernelTester()
26180 .mr(1)
26181 .nr(4)
26182 .kr(8)
26183 .sr(1)
26184 .m(1)
26185 .n(4)
26186 .k(k)
26187 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080026188 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026189 }
26190 }
26191
26192 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_gt_8_subtile) {
26193 TEST_REQUIRES_X86_SSE41;
26194 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026195 for (uint32_t n = 1; n <= 4; n++) {
26196 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070026197 GemmMicrokernelTester()
26198 .mr(1)
26199 .nr(4)
26200 .kr(8)
26201 .sr(1)
26202 .m(m)
26203 .n(n)
26204 .k(k)
26205 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026206 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026207 }
26208 }
26209 }
26210 }
26211
26212 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_div_8) {
26213 TEST_REQUIRES_X86_SSE41;
26214 for (size_t k = 16; k <= 80; k += 8) {
26215 GemmMicrokernelTester()
26216 .mr(1)
26217 .nr(4)
26218 .kr(8)
26219 .sr(1)
26220 .m(1)
26221 .n(4)
26222 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080026223 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026224 }
26225 }
26226
26227 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_div_8_strided_a) {
26228 TEST_REQUIRES_X86_SSE41;
26229 for (size_t k = 16; k <= 80; k += 8) {
26230 GemmMicrokernelTester()
26231 .mr(1)
26232 .nr(4)
26233 .kr(8)
26234 .sr(1)
26235 .m(1)
26236 .n(4)
26237 .k(k)
26238 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080026239 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026240 }
26241 }
26242
26243 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_div_8_subtile) {
26244 TEST_REQUIRES_X86_SSE41;
26245 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026246 for (uint32_t n = 1; n <= 4; n++) {
26247 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070026248 GemmMicrokernelTester()
26249 .mr(1)
26250 .nr(4)
26251 .kr(8)
26252 .sr(1)
26253 .m(m)
26254 .n(n)
26255 .k(k)
26256 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026257 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026258 }
26259 }
26260 }
26261 }
26262
26263 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_gt_4) {
26264 TEST_REQUIRES_X86_SSE41;
26265 for (uint32_t n = 5; n < 8; n++) {
26266 for (size_t k = 1; k <= 40; k += 9) {
26267 GemmMicrokernelTester()
26268 .mr(1)
26269 .nr(4)
26270 .kr(8)
26271 .sr(1)
26272 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026273 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070026274 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080026275 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026276 }
26277 }
26278 }
26279
26280 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_gt_4_strided_cn) {
26281 TEST_REQUIRES_X86_SSE41;
26282 for (uint32_t n = 5; n < 8; n++) {
26283 for (size_t k = 1; k <= 40; k += 9) {
26284 GemmMicrokernelTester()
26285 .mr(1)
26286 .nr(4)
26287 .kr(8)
26288 .sr(1)
26289 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026290 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070026291 .k(k)
26292 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080026293 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026294 }
26295 }
26296 }
26297
26298 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_gt_4_strided_a) {
26299 TEST_REQUIRES_X86_SSE41;
26300 for (uint32_t n = 5; n < 8; n++) {
26301 for (size_t k = 1; k <= 40; k += 9) {
26302 GemmMicrokernelTester()
26303 .mr(1)
26304 .nr(4)
26305 .kr(8)
26306 .sr(1)
26307 .m(1)
26308 .n(n)
26309 .k(k)
26310 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080026311 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026312 }
26313 }
26314 }
26315
26316 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_gt_4_subtile) {
26317 TEST_REQUIRES_X86_SSE41;
26318 for (uint32_t n = 5; n < 8; n++) {
26319 for (size_t k = 1; k <= 40; k += 9) {
26320 for (uint32_t m = 1; m <= 1; m++) {
26321 GemmMicrokernelTester()
26322 .mr(1)
26323 .nr(4)
26324 .kr(8)
26325 .sr(1)
26326 .m(m)
26327 .n(n)
26328 .k(k)
26329 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026330 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026331 }
26332 }
26333 }
26334 }
26335
26336 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_div_4) {
26337 TEST_REQUIRES_X86_SSE41;
26338 for (uint32_t n = 8; n <= 12; n += 4) {
26339 for (size_t k = 1; k <= 40; k += 9) {
26340 GemmMicrokernelTester()
26341 .mr(1)
26342 .nr(4)
26343 .kr(8)
26344 .sr(1)
26345 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026346 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070026347 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080026348 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026349 }
26350 }
26351 }
26352
26353 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_div_4_strided_cn) {
26354 TEST_REQUIRES_X86_SSE41;
26355 for (uint32_t n = 8; n <= 12; n += 4) {
26356 for (size_t k = 1; k <= 40; k += 9) {
26357 GemmMicrokernelTester()
26358 .mr(1)
26359 .nr(4)
26360 .kr(8)
26361 .sr(1)
26362 .m(1)
26363 .n(n)
26364 .k(k)
26365 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080026366 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026367 }
26368 }
26369 }
26370
26371 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_div_4_strided_a) {
26372 TEST_REQUIRES_X86_SSE41;
26373 for (uint32_t n = 8; n <= 12; n += 4) {
26374 for (size_t k = 1; k <= 40; k += 9) {
26375 GemmMicrokernelTester()
26376 .mr(1)
26377 .nr(4)
26378 .kr(8)
26379 .sr(1)
26380 .m(1)
26381 .n(n)
26382 .k(k)
26383 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080026384 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026385 }
26386 }
26387 }
26388
26389 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_div_4_subtile) {
26390 TEST_REQUIRES_X86_SSE41;
26391 for (uint32_t n = 8; n <= 12; n += 4) {
26392 for (size_t k = 1; k <= 40; k += 9) {
26393 for (uint32_t m = 1; m <= 1; m++) {
26394 GemmMicrokernelTester()
26395 .mr(1)
26396 .nr(4)
26397 .kr(8)
26398 .sr(1)
26399 .m(m)
26400 .n(n)
26401 .k(k)
26402 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026403 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026404 }
26405 }
26406 }
26407 }
26408
26409 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, strided_cm_subtile) {
26410 TEST_REQUIRES_X86_SSE41;
26411 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026412 for (uint32_t n = 1; n <= 4; n++) {
26413 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070026414 GemmMicrokernelTester()
26415 .mr(1)
26416 .nr(4)
26417 .kr(8)
26418 .sr(1)
26419 .m(m)
26420 .n(n)
26421 .k(k)
26422 .cm_stride(7)
26423 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026424 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026425 }
26426 }
26427 }
26428 }
26429
26430 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, qmin) {
26431 TEST_REQUIRES_X86_SSE41;
26432 GemmMicrokernelTester()
26433 .mr(1)
26434 .nr(4)
26435 .kr(8)
26436 .sr(1)
26437 .m(1)
26438 .n(4)
26439 .k(8)
26440 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080026441 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026442 }
26443
26444 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, qmax) {
26445 TEST_REQUIRES_X86_SSE41;
26446 GemmMicrokernelTester()
26447 .mr(1)
26448 .nr(4)
26449 .kr(8)
26450 .sr(1)
26451 .m(1)
26452 .n(4)
26453 .k(8)
26454 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080026455 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026456 }
26457
26458 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__SSE41_LD128, strided_cm) {
26459 TEST_REQUIRES_X86_SSE41;
26460 GemmMicrokernelTester()
26461 .mr(1)
26462 .nr(4)
26463 .kr(8)
26464 .sr(1)
26465 .m(1)
26466 .n(4)
26467 .k(8)
26468 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080026469 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026470 }
26471#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
26472
26473
26474#if XNN_ARCH_X86 || XNN_ARCH_X86_64
26475 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_eq_8) {
26476 TEST_REQUIRES_X86_SSE41;
26477 GemmMicrokernelTester()
26478 .mr(2)
26479 .nr(4)
26480 .kr(8)
26481 .sr(1)
26482 .m(2)
26483 .n(4)
26484 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080026485 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026486 }
26487
26488 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, strided_cn) {
26489 TEST_REQUIRES_X86_SSE41;
26490 GemmMicrokernelTester()
26491 .mr(2)
26492 .nr(4)
26493 .kr(8)
26494 .sr(1)
26495 .m(2)
26496 .n(4)
26497 .k(8)
26498 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080026499 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026500 }
26501
26502 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_eq_8_strided_a) {
26503 TEST_REQUIRES_X86_SSE41;
26504 GemmMicrokernelTester()
26505 .mr(2)
26506 .nr(4)
26507 .kr(8)
26508 .sr(1)
26509 .m(2)
26510 .n(4)
26511 .k(8)
26512 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080026513 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026514 }
26515
26516 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_eq_8_subtile) {
26517 TEST_REQUIRES_X86_SSE41;
Zhi An Ng83844ae2022-01-14 09:52:25 -080026518 for (uint32_t n = 1; n <= 4; n++) {
26519 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070026520 GemmMicrokernelTester()
26521 .mr(2)
26522 .nr(4)
26523 .kr(8)
26524 .sr(1)
26525 .m(m)
26526 .n(n)
26527 .k(8)
26528 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026529 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026530 }
26531 }
26532 }
26533
26534 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_eq_8_subtile_m) {
26535 TEST_REQUIRES_X86_SSE41;
26536 for (uint32_t m = 1; m <= 2; m++) {
26537 GemmMicrokernelTester()
26538 .mr(2)
26539 .nr(4)
26540 .kr(8)
26541 .sr(1)
26542 .m(m)
26543 .n(4)
26544 .k(8)
26545 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026546 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026547 }
26548 }
26549
26550 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_eq_8_subtile_n) {
26551 TEST_REQUIRES_X86_SSE41;
26552 for (uint32_t n = 1; n <= 4; n++) {
26553 GemmMicrokernelTester()
26554 .mr(2)
26555 .nr(4)
26556 .kr(8)
26557 .sr(1)
26558 .m(2)
26559 .n(n)
26560 .k(8)
26561 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026562 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026563 }
26564 }
26565
26566 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_lt_8) {
26567 TEST_REQUIRES_X86_SSE41;
26568 for (size_t k = 1; k < 8; k++) {
26569 GemmMicrokernelTester()
26570 .mr(2)
26571 .nr(4)
26572 .kr(8)
26573 .sr(1)
26574 .m(2)
26575 .n(4)
26576 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080026577 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026578 }
26579 }
26580
26581 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_lt_8_strided_a) {
26582 TEST_REQUIRES_X86_SSE41;
26583 for (size_t k = 1; k < 8; k++) {
26584 GemmMicrokernelTester()
26585 .mr(2)
26586 .nr(4)
26587 .kr(8)
26588 .sr(1)
26589 .m(2)
26590 .n(4)
26591 .k(k)
26592 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080026593 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026594 }
26595 }
26596
26597 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_lt_8_subtile) {
26598 TEST_REQUIRES_X86_SSE41;
26599 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026600 for (uint32_t n = 1; n <= 4; n++) {
26601 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070026602 GemmMicrokernelTester()
26603 .mr(2)
26604 .nr(4)
26605 .kr(8)
26606 .sr(1)
26607 .m(m)
26608 .n(n)
26609 .k(k)
26610 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026611 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026612 }
26613 }
26614 }
26615 }
26616
26617 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_gt_8) {
26618 TEST_REQUIRES_X86_SSE41;
26619 for (size_t k = 9; k < 16; k++) {
26620 GemmMicrokernelTester()
26621 .mr(2)
26622 .nr(4)
26623 .kr(8)
26624 .sr(1)
26625 .m(2)
26626 .n(4)
26627 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080026628 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026629 }
26630 }
26631
26632 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_gt_8_strided_a) {
26633 TEST_REQUIRES_X86_SSE41;
26634 for (size_t k = 9; k < 16; k++) {
26635 GemmMicrokernelTester()
26636 .mr(2)
26637 .nr(4)
26638 .kr(8)
26639 .sr(1)
26640 .m(2)
26641 .n(4)
26642 .k(k)
26643 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080026644 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026645 }
26646 }
26647
26648 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_gt_8_subtile) {
26649 TEST_REQUIRES_X86_SSE41;
26650 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026651 for (uint32_t n = 1; n <= 4; n++) {
26652 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070026653 GemmMicrokernelTester()
26654 .mr(2)
26655 .nr(4)
26656 .kr(8)
26657 .sr(1)
26658 .m(m)
26659 .n(n)
26660 .k(k)
26661 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026662 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026663 }
26664 }
26665 }
26666 }
26667
26668 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_div_8) {
26669 TEST_REQUIRES_X86_SSE41;
26670 for (size_t k = 16; k <= 80; k += 8) {
26671 GemmMicrokernelTester()
26672 .mr(2)
26673 .nr(4)
26674 .kr(8)
26675 .sr(1)
26676 .m(2)
26677 .n(4)
26678 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080026679 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026680 }
26681 }
26682
26683 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_div_8_strided_a) {
26684 TEST_REQUIRES_X86_SSE41;
26685 for (size_t k = 16; k <= 80; k += 8) {
26686 GemmMicrokernelTester()
26687 .mr(2)
26688 .nr(4)
26689 .kr(8)
26690 .sr(1)
26691 .m(2)
26692 .n(4)
26693 .k(k)
26694 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080026695 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026696 }
26697 }
26698
26699 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_div_8_subtile) {
26700 TEST_REQUIRES_X86_SSE41;
26701 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026702 for (uint32_t n = 1; n <= 4; n++) {
26703 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070026704 GemmMicrokernelTester()
26705 .mr(2)
26706 .nr(4)
26707 .kr(8)
26708 .sr(1)
26709 .m(m)
26710 .n(n)
26711 .k(k)
26712 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026713 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026714 }
26715 }
26716 }
26717 }
26718
26719 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_gt_4) {
26720 TEST_REQUIRES_X86_SSE41;
26721 for (uint32_t n = 5; n < 8; n++) {
26722 for (size_t k = 1; k <= 40; k += 9) {
26723 GemmMicrokernelTester()
26724 .mr(2)
26725 .nr(4)
26726 .kr(8)
26727 .sr(1)
26728 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026729 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070026730 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080026731 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026732 }
26733 }
26734 }
26735
26736 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_gt_4_strided_cn) {
26737 TEST_REQUIRES_X86_SSE41;
26738 for (uint32_t n = 5; n < 8; n++) {
26739 for (size_t k = 1; k <= 40; k += 9) {
26740 GemmMicrokernelTester()
26741 .mr(2)
26742 .nr(4)
26743 .kr(8)
26744 .sr(1)
26745 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026746 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070026747 .k(k)
26748 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080026749 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026750 }
26751 }
26752 }
26753
26754 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_gt_4_strided_a) {
26755 TEST_REQUIRES_X86_SSE41;
26756 for (uint32_t n = 5; n < 8; n++) {
26757 for (size_t k = 1; k <= 40; k += 9) {
26758 GemmMicrokernelTester()
26759 .mr(2)
26760 .nr(4)
26761 .kr(8)
26762 .sr(1)
26763 .m(2)
26764 .n(n)
26765 .k(k)
26766 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080026767 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026768 }
26769 }
26770 }
26771
26772 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_gt_4_subtile) {
26773 TEST_REQUIRES_X86_SSE41;
26774 for (uint32_t n = 5; n < 8; n++) {
26775 for (size_t k = 1; k <= 40; k += 9) {
26776 for (uint32_t m = 1; m <= 2; m++) {
26777 GemmMicrokernelTester()
26778 .mr(2)
26779 .nr(4)
26780 .kr(8)
26781 .sr(1)
26782 .m(m)
26783 .n(n)
26784 .k(k)
26785 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026786 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026787 }
26788 }
26789 }
26790 }
26791
26792 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_div_4) {
26793 TEST_REQUIRES_X86_SSE41;
26794 for (uint32_t n = 8; n <= 12; n += 4) {
26795 for (size_t k = 1; k <= 40; k += 9) {
26796 GemmMicrokernelTester()
26797 .mr(2)
26798 .nr(4)
26799 .kr(8)
26800 .sr(1)
26801 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026802 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070026803 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080026804 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026805 }
26806 }
26807 }
26808
26809 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_div_4_strided_cn) {
26810 TEST_REQUIRES_X86_SSE41;
26811 for (uint32_t n = 8; n <= 12; n += 4) {
26812 for (size_t k = 1; k <= 40; k += 9) {
26813 GemmMicrokernelTester()
26814 .mr(2)
26815 .nr(4)
26816 .kr(8)
26817 .sr(1)
26818 .m(2)
26819 .n(n)
26820 .k(k)
26821 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080026822 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026823 }
26824 }
26825 }
26826
26827 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_div_4_strided_a) {
26828 TEST_REQUIRES_X86_SSE41;
26829 for (uint32_t n = 8; n <= 12; n += 4) {
26830 for (size_t k = 1; k <= 40; k += 9) {
26831 GemmMicrokernelTester()
26832 .mr(2)
26833 .nr(4)
26834 .kr(8)
26835 .sr(1)
26836 .m(2)
26837 .n(n)
26838 .k(k)
26839 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080026840 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026841 }
26842 }
26843 }
26844
26845 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_div_4_subtile) {
26846 TEST_REQUIRES_X86_SSE41;
26847 for (uint32_t n = 8; n <= 12; n += 4) {
26848 for (size_t k = 1; k <= 40; k += 9) {
26849 for (uint32_t m = 1; m <= 2; m++) {
26850 GemmMicrokernelTester()
26851 .mr(2)
26852 .nr(4)
26853 .kr(8)
26854 .sr(1)
26855 .m(m)
26856 .n(n)
26857 .k(k)
26858 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026859 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026860 }
26861 }
26862 }
26863 }
26864
26865 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, strided_cm_subtile) {
26866 TEST_REQUIRES_X86_SSE41;
26867 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026868 for (uint32_t n = 1; n <= 4; n++) {
26869 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070026870 GemmMicrokernelTester()
26871 .mr(2)
26872 .nr(4)
26873 .kr(8)
26874 .sr(1)
26875 .m(m)
26876 .n(n)
26877 .k(k)
26878 .cm_stride(7)
26879 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026880 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026881 }
26882 }
26883 }
26884 }
26885
26886 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, qmin) {
26887 TEST_REQUIRES_X86_SSE41;
26888 GemmMicrokernelTester()
26889 .mr(2)
26890 .nr(4)
26891 .kr(8)
26892 .sr(1)
26893 .m(2)
26894 .n(4)
26895 .k(8)
26896 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080026897 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026898 }
26899
26900 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, qmax) {
26901 TEST_REQUIRES_X86_SSE41;
26902 GemmMicrokernelTester()
26903 .mr(2)
26904 .nr(4)
26905 .kr(8)
26906 .sr(1)
26907 .m(2)
26908 .n(4)
26909 .k(8)
26910 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080026911 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026912 }
26913
26914 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, strided_cm) {
26915 TEST_REQUIRES_X86_SSE41;
26916 GemmMicrokernelTester()
26917 .mr(2)
26918 .nr(4)
26919 .kr(8)
26920 .sr(1)
26921 .m(2)
26922 .n(4)
26923 .k(8)
26924 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080026925 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026926 }
26927#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
26928
26929
26930#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhanc46e6712021-06-01 19:00:16 -070026931 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_eq_8) {
26932 TEST_REQUIRES_X86_AVX;
26933 GemmMicrokernelTester()
26934 .mr(3)
26935 .nr(4)
26936 .kr(8)
26937 .sr(1)
26938 .m(3)
26939 .n(4)
26940 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080026941 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026942 }
26943
26944 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128, strided_cn) {
26945 TEST_REQUIRES_X86_AVX;
26946 GemmMicrokernelTester()
26947 .mr(3)
26948 .nr(4)
26949 .kr(8)
26950 .sr(1)
26951 .m(3)
26952 .n(4)
26953 .k(8)
26954 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080026955 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026956 }
26957
26958 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_eq_8_strided_a) {
26959 TEST_REQUIRES_X86_AVX;
26960 GemmMicrokernelTester()
26961 .mr(3)
26962 .nr(4)
26963 .kr(8)
26964 .sr(1)
26965 .m(3)
26966 .n(4)
26967 .k(8)
26968 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080026969 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026970 }
26971
26972 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_eq_8_subtile) {
26973 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080026974 for (uint32_t n = 1; n <= 4; n++) {
26975 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070026976 GemmMicrokernelTester()
26977 .mr(3)
26978 .nr(4)
26979 .kr(8)
26980 .sr(1)
26981 .m(m)
26982 .n(n)
26983 .k(8)
26984 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026985 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070026986 }
26987 }
26988 }
26989
26990 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_eq_8_subtile_m) {
26991 TEST_REQUIRES_X86_AVX;
26992 for (uint32_t m = 1; m <= 3; m++) {
26993 GemmMicrokernelTester()
26994 .mr(3)
26995 .nr(4)
26996 .kr(8)
26997 .sr(1)
26998 .m(m)
26999 .n(4)
27000 .k(8)
27001 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027002 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027003 }
27004 }
27005
27006 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_eq_8_subtile_n) {
27007 TEST_REQUIRES_X86_AVX;
27008 for (uint32_t n = 1; n <= 4; n++) {
27009 GemmMicrokernelTester()
27010 .mr(3)
27011 .nr(4)
27012 .kr(8)
27013 .sr(1)
27014 .m(3)
27015 .n(n)
27016 .k(8)
27017 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027018 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027019 }
27020 }
27021
27022 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_lt_8) {
27023 TEST_REQUIRES_X86_AVX;
27024 for (size_t k = 1; k < 8; k++) {
27025 GemmMicrokernelTester()
27026 .mr(3)
27027 .nr(4)
27028 .kr(8)
27029 .sr(1)
27030 .m(3)
27031 .n(4)
27032 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080027033 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027034 }
27035 }
27036
27037 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_lt_8_strided_a) {
27038 TEST_REQUIRES_X86_AVX;
27039 for (size_t k = 1; k < 8; k++) {
27040 GemmMicrokernelTester()
27041 .mr(3)
27042 .nr(4)
27043 .kr(8)
27044 .sr(1)
27045 .m(3)
27046 .n(4)
27047 .k(k)
27048 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080027049 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027050 }
27051 }
27052
27053 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_lt_8_subtile) {
27054 TEST_REQUIRES_X86_AVX;
27055 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027056 for (uint32_t n = 1; n <= 4; n++) {
27057 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070027058 GemmMicrokernelTester()
27059 .mr(3)
27060 .nr(4)
27061 .kr(8)
27062 .sr(1)
27063 .m(m)
27064 .n(n)
27065 .k(k)
27066 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027067 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027068 }
27069 }
27070 }
27071 }
27072
27073 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_gt_8) {
27074 TEST_REQUIRES_X86_AVX;
27075 for (size_t k = 9; k < 16; k++) {
27076 GemmMicrokernelTester()
27077 .mr(3)
27078 .nr(4)
27079 .kr(8)
27080 .sr(1)
27081 .m(3)
27082 .n(4)
27083 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080027084 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027085 }
27086 }
27087
27088 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_gt_8_strided_a) {
27089 TEST_REQUIRES_X86_AVX;
27090 for (size_t k = 9; k < 16; k++) {
27091 GemmMicrokernelTester()
27092 .mr(3)
27093 .nr(4)
27094 .kr(8)
27095 .sr(1)
27096 .m(3)
27097 .n(4)
27098 .k(k)
27099 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080027100 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027101 }
27102 }
27103
27104 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_gt_8_subtile) {
27105 TEST_REQUIRES_X86_AVX;
27106 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027107 for (uint32_t n = 1; n <= 4; n++) {
27108 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070027109 GemmMicrokernelTester()
27110 .mr(3)
27111 .nr(4)
27112 .kr(8)
27113 .sr(1)
27114 .m(m)
27115 .n(n)
27116 .k(k)
27117 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027118 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027119 }
27120 }
27121 }
27122 }
27123
27124 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_div_8) {
27125 TEST_REQUIRES_X86_AVX;
27126 for (size_t k = 16; k <= 80; k += 8) {
27127 GemmMicrokernelTester()
27128 .mr(3)
27129 .nr(4)
27130 .kr(8)
27131 .sr(1)
27132 .m(3)
27133 .n(4)
27134 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080027135 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027136 }
27137 }
27138
27139 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_div_8_strided_a) {
27140 TEST_REQUIRES_X86_AVX;
27141 for (size_t k = 16; k <= 80; k += 8) {
27142 GemmMicrokernelTester()
27143 .mr(3)
27144 .nr(4)
27145 .kr(8)
27146 .sr(1)
27147 .m(3)
27148 .n(4)
27149 .k(k)
27150 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080027151 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027152 }
27153 }
27154
27155 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_div_8_subtile) {
27156 TEST_REQUIRES_X86_AVX;
27157 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027158 for (uint32_t n = 1; n <= 4; n++) {
27159 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070027160 GemmMicrokernelTester()
27161 .mr(3)
27162 .nr(4)
27163 .kr(8)
27164 .sr(1)
27165 .m(m)
27166 .n(n)
27167 .k(k)
27168 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027169 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027170 }
27171 }
27172 }
27173 }
27174
27175 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128, n_gt_4) {
27176 TEST_REQUIRES_X86_AVX;
27177 for (uint32_t n = 5; n < 8; n++) {
27178 for (size_t k = 1; k <= 40; k += 9) {
27179 GemmMicrokernelTester()
27180 .mr(3)
27181 .nr(4)
27182 .kr(8)
27183 .sr(1)
27184 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080027185 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070027186 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080027187 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027188 }
27189 }
27190 }
27191
27192 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128, n_gt_4_strided_cn) {
27193 TEST_REQUIRES_X86_AVX;
27194 for (uint32_t n = 5; n < 8; n++) {
27195 for (size_t k = 1; k <= 40; k += 9) {
27196 GemmMicrokernelTester()
27197 .mr(3)
27198 .nr(4)
27199 .kr(8)
27200 .sr(1)
27201 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080027202 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070027203 .k(k)
27204 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080027205 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027206 }
27207 }
27208 }
27209
27210 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128, n_gt_4_strided_a) {
27211 TEST_REQUIRES_X86_AVX;
27212 for (uint32_t n = 5; n < 8; n++) {
27213 for (size_t k = 1; k <= 40; k += 9) {
27214 GemmMicrokernelTester()
27215 .mr(3)
27216 .nr(4)
27217 .kr(8)
27218 .sr(1)
27219 .m(3)
27220 .n(n)
27221 .k(k)
27222 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080027223 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027224 }
27225 }
27226 }
27227
27228 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128, n_gt_4_subtile) {
27229 TEST_REQUIRES_X86_AVX;
27230 for (uint32_t n = 5; n < 8; n++) {
27231 for (size_t k = 1; k <= 40; k += 9) {
27232 for (uint32_t m = 1; m <= 3; m++) {
27233 GemmMicrokernelTester()
27234 .mr(3)
27235 .nr(4)
27236 .kr(8)
27237 .sr(1)
27238 .m(m)
27239 .n(n)
27240 .k(k)
27241 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027242 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027243 }
27244 }
27245 }
27246 }
27247
27248 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128, n_div_4) {
27249 TEST_REQUIRES_X86_AVX;
27250 for (uint32_t n = 8; n <= 12; n += 4) {
27251 for (size_t k = 1; k <= 40; k += 9) {
27252 GemmMicrokernelTester()
27253 .mr(3)
27254 .nr(4)
27255 .kr(8)
27256 .sr(1)
27257 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080027258 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070027259 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080027260 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027261 }
27262 }
27263 }
27264
27265 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128, n_div_4_strided_cn) {
27266 TEST_REQUIRES_X86_AVX;
27267 for (uint32_t n = 8; n <= 12; n += 4) {
27268 for (size_t k = 1; k <= 40; k += 9) {
27269 GemmMicrokernelTester()
27270 .mr(3)
27271 .nr(4)
27272 .kr(8)
27273 .sr(1)
27274 .m(3)
27275 .n(n)
27276 .k(k)
27277 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080027278 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027279 }
27280 }
27281 }
27282
27283 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128, n_div_4_strided_a) {
27284 TEST_REQUIRES_X86_AVX;
27285 for (uint32_t n = 8; n <= 12; n += 4) {
27286 for (size_t k = 1; k <= 40; k += 9) {
27287 GemmMicrokernelTester()
27288 .mr(3)
27289 .nr(4)
27290 .kr(8)
27291 .sr(1)
27292 .m(3)
27293 .n(n)
27294 .k(k)
27295 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080027296 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027297 }
27298 }
27299 }
27300
27301 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128, n_div_4_subtile) {
27302 TEST_REQUIRES_X86_AVX;
27303 for (uint32_t n = 8; n <= 12; n += 4) {
27304 for (size_t k = 1; k <= 40; k += 9) {
27305 for (uint32_t m = 1; m <= 3; m++) {
27306 GemmMicrokernelTester()
27307 .mr(3)
27308 .nr(4)
27309 .kr(8)
27310 .sr(1)
27311 .m(m)
27312 .n(n)
27313 .k(k)
27314 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027315 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027316 }
27317 }
27318 }
27319 }
27320
27321 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128, strided_cm_subtile) {
27322 TEST_REQUIRES_X86_AVX;
27323 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027324 for (uint32_t n = 1; n <= 4; n++) {
27325 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070027326 GemmMicrokernelTester()
27327 .mr(3)
27328 .nr(4)
27329 .kr(8)
27330 .sr(1)
27331 .m(m)
27332 .n(n)
27333 .k(k)
27334 .cm_stride(7)
27335 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027336 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027337 }
27338 }
27339 }
27340 }
27341
27342 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128, qmin) {
27343 TEST_REQUIRES_X86_AVX;
27344 GemmMicrokernelTester()
27345 .mr(3)
27346 .nr(4)
27347 .kr(8)
27348 .sr(1)
27349 .m(3)
27350 .n(4)
27351 .k(8)
27352 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080027353 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027354 }
27355
27356 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128, qmax) {
27357 TEST_REQUIRES_X86_AVX;
27358 GemmMicrokernelTester()
27359 .mr(3)
27360 .nr(4)
27361 .kr(8)
27362 .sr(1)
27363 .m(3)
27364 .n(4)
27365 .k(8)
27366 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080027367 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027368 }
27369
27370 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__AVX_LD128, strided_cm) {
27371 TEST_REQUIRES_X86_AVX;
27372 GemmMicrokernelTester()
27373 .mr(3)
27374 .nr(4)
27375 .kr(8)
27376 .sr(1)
27377 .m(3)
27378 .n(4)
27379 .k(8)
27380 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080027381 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027382 }
27383#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
27384
27385
27386#if XNN_ARCH_X86 || XNN_ARCH_X86_64
27387 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_eq_8) {
27388 TEST_REQUIRES_X86_XOP;
27389 GemmMicrokernelTester()
27390 .mr(1)
27391 .nr(4)
27392 .kr(8)
27393 .sr(1)
27394 .m(1)
27395 .n(4)
27396 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080027397 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027398 }
27399
27400 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128, strided_cn) {
27401 TEST_REQUIRES_X86_XOP;
27402 GemmMicrokernelTester()
27403 .mr(1)
27404 .nr(4)
27405 .kr(8)
27406 .sr(1)
27407 .m(1)
27408 .n(4)
27409 .k(8)
27410 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080027411 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027412 }
27413
27414 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_eq_8_strided_a) {
27415 TEST_REQUIRES_X86_XOP;
27416 GemmMicrokernelTester()
27417 .mr(1)
27418 .nr(4)
27419 .kr(8)
27420 .sr(1)
27421 .m(1)
27422 .n(4)
27423 .k(8)
27424 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080027425 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027426 }
27427
27428 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_eq_8_subtile) {
27429 TEST_REQUIRES_X86_XOP;
Zhi An Ng83844ae2022-01-14 09:52:25 -080027430 for (uint32_t n = 1; n <= 4; n++) {
27431 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070027432 GemmMicrokernelTester()
27433 .mr(1)
27434 .nr(4)
27435 .kr(8)
27436 .sr(1)
27437 .m(m)
27438 .n(n)
27439 .k(8)
27440 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027441 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027442 }
27443 }
27444 }
27445
27446 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_eq_8_subtile_m) {
27447 TEST_REQUIRES_X86_XOP;
27448 for (uint32_t m = 1; m <= 1; m++) {
27449 GemmMicrokernelTester()
27450 .mr(1)
27451 .nr(4)
27452 .kr(8)
27453 .sr(1)
27454 .m(m)
27455 .n(4)
27456 .k(8)
27457 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027458 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027459 }
27460 }
27461
27462 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_eq_8_subtile_n) {
27463 TEST_REQUIRES_X86_XOP;
27464 for (uint32_t n = 1; n <= 4; n++) {
27465 GemmMicrokernelTester()
27466 .mr(1)
27467 .nr(4)
27468 .kr(8)
27469 .sr(1)
27470 .m(1)
27471 .n(n)
27472 .k(8)
27473 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027474 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027475 }
27476 }
27477
27478 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_lt_8) {
27479 TEST_REQUIRES_X86_XOP;
27480 for (size_t k = 1; k < 8; k++) {
27481 GemmMicrokernelTester()
27482 .mr(1)
27483 .nr(4)
27484 .kr(8)
27485 .sr(1)
27486 .m(1)
27487 .n(4)
27488 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080027489 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027490 }
27491 }
27492
27493 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_lt_8_strided_a) {
27494 TEST_REQUIRES_X86_XOP;
27495 for (size_t k = 1; k < 8; k++) {
27496 GemmMicrokernelTester()
27497 .mr(1)
27498 .nr(4)
27499 .kr(8)
27500 .sr(1)
27501 .m(1)
27502 .n(4)
27503 .k(k)
27504 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080027505 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027506 }
27507 }
27508
27509 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_lt_8_subtile) {
27510 TEST_REQUIRES_X86_XOP;
27511 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027512 for (uint32_t n = 1; n <= 4; n++) {
27513 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070027514 GemmMicrokernelTester()
27515 .mr(1)
27516 .nr(4)
27517 .kr(8)
27518 .sr(1)
27519 .m(m)
27520 .n(n)
27521 .k(k)
27522 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027523 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027524 }
27525 }
27526 }
27527 }
27528
27529 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_gt_8) {
27530 TEST_REQUIRES_X86_XOP;
27531 for (size_t k = 9; k < 16; k++) {
27532 GemmMicrokernelTester()
27533 .mr(1)
27534 .nr(4)
27535 .kr(8)
27536 .sr(1)
27537 .m(1)
27538 .n(4)
27539 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080027540 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027541 }
27542 }
27543
27544 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_gt_8_strided_a) {
27545 TEST_REQUIRES_X86_XOP;
27546 for (size_t k = 9; k < 16; k++) {
27547 GemmMicrokernelTester()
27548 .mr(1)
27549 .nr(4)
27550 .kr(8)
27551 .sr(1)
27552 .m(1)
27553 .n(4)
27554 .k(k)
27555 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080027556 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027557 }
27558 }
27559
27560 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_gt_8_subtile) {
27561 TEST_REQUIRES_X86_XOP;
27562 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027563 for (uint32_t n = 1; n <= 4; n++) {
27564 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070027565 GemmMicrokernelTester()
27566 .mr(1)
27567 .nr(4)
27568 .kr(8)
27569 .sr(1)
27570 .m(m)
27571 .n(n)
27572 .k(k)
27573 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027574 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027575 }
27576 }
27577 }
27578 }
27579
27580 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_div_8) {
27581 TEST_REQUIRES_X86_XOP;
27582 for (size_t k = 16; k <= 80; k += 8) {
27583 GemmMicrokernelTester()
27584 .mr(1)
27585 .nr(4)
27586 .kr(8)
27587 .sr(1)
27588 .m(1)
27589 .n(4)
27590 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080027591 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027592 }
27593 }
27594
27595 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_div_8_strided_a) {
27596 TEST_REQUIRES_X86_XOP;
27597 for (size_t k = 16; k <= 80; k += 8) {
27598 GemmMicrokernelTester()
27599 .mr(1)
27600 .nr(4)
27601 .kr(8)
27602 .sr(1)
27603 .m(1)
27604 .n(4)
27605 .k(k)
27606 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080027607 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027608 }
27609 }
27610
27611 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_div_8_subtile) {
27612 TEST_REQUIRES_X86_XOP;
27613 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027614 for (uint32_t n = 1; n <= 4; n++) {
27615 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070027616 GemmMicrokernelTester()
27617 .mr(1)
27618 .nr(4)
27619 .kr(8)
27620 .sr(1)
27621 .m(m)
27622 .n(n)
27623 .k(k)
27624 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027625 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027626 }
27627 }
27628 }
27629 }
27630
27631 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128, n_gt_4) {
27632 TEST_REQUIRES_X86_XOP;
27633 for (uint32_t n = 5; n < 8; n++) {
27634 for (size_t k = 1; k <= 40; k += 9) {
27635 GemmMicrokernelTester()
27636 .mr(1)
27637 .nr(4)
27638 .kr(8)
27639 .sr(1)
27640 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080027641 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070027642 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080027643 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027644 }
27645 }
27646 }
27647
27648 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128, n_gt_4_strided_cn) {
27649 TEST_REQUIRES_X86_XOP;
27650 for (uint32_t n = 5; n < 8; n++) {
27651 for (size_t k = 1; k <= 40; k += 9) {
27652 GemmMicrokernelTester()
27653 .mr(1)
27654 .nr(4)
27655 .kr(8)
27656 .sr(1)
27657 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080027658 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070027659 .k(k)
27660 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080027661 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027662 }
27663 }
27664 }
27665
27666 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128, n_gt_4_strided_a) {
27667 TEST_REQUIRES_X86_XOP;
27668 for (uint32_t n = 5; n < 8; n++) {
27669 for (size_t k = 1; k <= 40; k += 9) {
27670 GemmMicrokernelTester()
27671 .mr(1)
27672 .nr(4)
27673 .kr(8)
27674 .sr(1)
27675 .m(1)
27676 .n(n)
27677 .k(k)
27678 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080027679 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027680 }
27681 }
27682 }
27683
27684 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128, n_gt_4_subtile) {
27685 TEST_REQUIRES_X86_XOP;
27686 for (uint32_t n = 5; n < 8; n++) {
27687 for (size_t k = 1; k <= 40; k += 9) {
27688 for (uint32_t m = 1; m <= 1; m++) {
27689 GemmMicrokernelTester()
27690 .mr(1)
27691 .nr(4)
27692 .kr(8)
27693 .sr(1)
27694 .m(m)
27695 .n(n)
27696 .k(k)
27697 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027698 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027699 }
27700 }
27701 }
27702 }
27703
27704 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128, n_div_4) {
27705 TEST_REQUIRES_X86_XOP;
27706 for (uint32_t n = 8; n <= 12; n += 4) {
27707 for (size_t k = 1; k <= 40; k += 9) {
27708 GemmMicrokernelTester()
27709 .mr(1)
27710 .nr(4)
27711 .kr(8)
27712 .sr(1)
27713 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080027714 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070027715 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080027716 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027717 }
27718 }
27719 }
27720
27721 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128, n_div_4_strided_cn) {
27722 TEST_REQUIRES_X86_XOP;
27723 for (uint32_t n = 8; n <= 12; n += 4) {
27724 for (size_t k = 1; k <= 40; k += 9) {
27725 GemmMicrokernelTester()
27726 .mr(1)
27727 .nr(4)
27728 .kr(8)
27729 .sr(1)
27730 .m(1)
27731 .n(n)
27732 .k(k)
27733 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080027734 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027735 }
27736 }
27737 }
27738
27739 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128, n_div_4_strided_a) {
27740 TEST_REQUIRES_X86_XOP;
27741 for (uint32_t n = 8; n <= 12; n += 4) {
27742 for (size_t k = 1; k <= 40; k += 9) {
27743 GemmMicrokernelTester()
27744 .mr(1)
27745 .nr(4)
27746 .kr(8)
27747 .sr(1)
27748 .m(1)
27749 .n(n)
27750 .k(k)
27751 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080027752 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027753 }
27754 }
27755 }
27756
27757 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128, n_div_4_subtile) {
27758 TEST_REQUIRES_X86_XOP;
27759 for (uint32_t n = 8; n <= 12; n += 4) {
27760 for (size_t k = 1; k <= 40; k += 9) {
27761 for (uint32_t m = 1; m <= 1; m++) {
27762 GemmMicrokernelTester()
27763 .mr(1)
27764 .nr(4)
27765 .kr(8)
27766 .sr(1)
27767 .m(m)
27768 .n(n)
27769 .k(k)
27770 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027771 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027772 }
27773 }
27774 }
27775 }
27776
27777 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128, strided_cm_subtile) {
27778 TEST_REQUIRES_X86_XOP;
27779 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027780 for (uint32_t n = 1; n <= 4; n++) {
27781 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070027782 GemmMicrokernelTester()
27783 .mr(1)
27784 .nr(4)
27785 .kr(8)
27786 .sr(1)
27787 .m(m)
27788 .n(n)
27789 .k(k)
27790 .cm_stride(7)
27791 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027792 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027793 }
27794 }
27795 }
27796 }
27797
27798 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128, qmin) {
27799 TEST_REQUIRES_X86_XOP;
27800 GemmMicrokernelTester()
27801 .mr(1)
27802 .nr(4)
27803 .kr(8)
27804 .sr(1)
27805 .m(1)
27806 .n(4)
27807 .k(8)
27808 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080027809 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027810 }
27811
27812 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128, qmax) {
27813 TEST_REQUIRES_X86_XOP;
27814 GemmMicrokernelTester()
27815 .mr(1)
27816 .nr(4)
27817 .kr(8)
27818 .sr(1)
27819 .m(1)
27820 .n(4)
27821 .k(8)
27822 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080027823 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027824 }
27825
27826 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__XOP_LD128, strided_cm) {
27827 TEST_REQUIRES_X86_XOP;
27828 GemmMicrokernelTester()
27829 .mr(1)
27830 .nr(4)
27831 .kr(8)
27832 .sr(1)
27833 .m(1)
27834 .n(4)
27835 .k(8)
27836 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080027837 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027838 }
27839#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
27840
27841
27842#if XNN_ARCH_X86 || XNN_ARCH_X86_64
27843 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_eq_8) {
27844 TEST_REQUIRES_X86_XOP;
27845 GemmMicrokernelTester()
27846 .mr(2)
27847 .nr(4)
27848 .kr(8)
27849 .sr(1)
27850 .m(2)
27851 .n(4)
27852 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080027853 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027854 }
27855
27856 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128, strided_cn) {
27857 TEST_REQUIRES_X86_XOP;
27858 GemmMicrokernelTester()
27859 .mr(2)
27860 .nr(4)
27861 .kr(8)
27862 .sr(1)
27863 .m(2)
27864 .n(4)
27865 .k(8)
27866 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080027867 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027868 }
27869
27870 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_eq_8_strided_a) {
27871 TEST_REQUIRES_X86_XOP;
27872 GemmMicrokernelTester()
27873 .mr(2)
27874 .nr(4)
27875 .kr(8)
27876 .sr(1)
27877 .m(2)
27878 .n(4)
27879 .k(8)
27880 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080027881 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027882 }
27883
27884 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_eq_8_subtile) {
27885 TEST_REQUIRES_X86_XOP;
Zhi An Ng83844ae2022-01-14 09:52:25 -080027886 for (uint32_t n = 1; n <= 4; n++) {
27887 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070027888 GemmMicrokernelTester()
27889 .mr(2)
27890 .nr(4)
27891 .kr(8)
27892 .sr(1)
27893 .m(m)
27894 .n(n)
27895 .k(8)
27896 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027897 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027898 }
27899 }
27900 }
27901
27902 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_eq_8_subtile_m) {
27903 TEST_REQUIRES_X86_XOP;
27904 for (uint32_t m = 1; m <= 2; m++) {
27905 GemmMicrokernelTester()
27906 .mr(2)
27907 .nr(4)
27908 .kr(8)
27909 .sr(1)
27910 .m(m)
27911 .n(4)
27912 .k(8)
27913 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027914 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027915 }
27916 }
27917
27918 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_eq_8_subtile_n) {
27919 TEST_REQUIRES_X86_XOP;
27920 for (uint32_t n = 1; n <= 4; n++) {
27921 GemmMicrokernelTester()
27922 .mr(2)
27923 .nr(4)
27924 .kr(8)
27925 .sr(1)
27926 .m(2)
27927 .n(n)
27928 .k(8)
27929 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027930 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027931 }
27932 }
27933
27934 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_lt_8) {
27935 TEST_REQUIRES_X86_XOP;
27936 for (size_t k = 1; k < 8; k++) {
27937 GemmMicrokernelTester()
27938 .mr(2)
27939 .nr(4)
27940 .kr(8)
27941 .sr(1)
27942 .m(2)
27943 .n(4)
27944 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080027945 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027946 }
27947 }
27948
27949 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_lt_8_strided_a) {
27950 TEST_REQUIRES_X86_XOP;
27951 for (size_t k = 1; k < 8; k++) {
27952 GemmMicrokernelTester()
27953 .mr(2)
27954 .nr(4)
27955 .kr(8)
27956 .sr(1)
27957 .m(2)
27958 .n(4)
27959 .k(k)
27960 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080027961 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027962 }
27963 }
27964
27965 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_lt_8_subtile) {
27966 TEST_REQUIRES_X86_XOP;
27967 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027968 for (uint32_t n = 1; n <= 4; n++) {
27969 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070027970 GemmMicrokernelTester()
27971 .mr(2)
27972 .nr(4)
27973 .kr(8)
27974 .sr(1)
27975 .m(m)
27976 .n(n)
27977 .k(k)
27978 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027979 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027980 }
27981 }
27982 }
27983 }
27984
27985 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_gt_8) {
27986 TEST_REQUIRES_X86_XOP;
27987 for (size_t k = 9; k < 16; k++) {
27988 GemmMicrokernelTester()
27989 .mr(2)
27990 .nr(4)
27991 .kr(8)
27992 .sr(1)
27993 .m(2)
27994 .n(4)
27995 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080027996 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070027997 }
27998 }
27999
28000 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_gt_8_strided_a) {
28001 TEST_REQUIRES_X86_XOP;
28002 for (size_t k = 9; k < 16; k++) {
28003 GemmMicrokernelTester()
28004 .mr(2)
28005 .nr(4)
28006 .kr(8)
28007 .sr(1)
28008 .m(2)
28009 .n(4)
28010 .k(k)
28011 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080028012 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070028013 }
28014 }
28015
28016 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_gt_8_subtile) {
28017 TEST_REQUIRES_X86_XOP;
28018 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028019 for (uint32_t n = 1; n <= 4; n++) {
28020 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070028021 GemmMicrokernelTester()
28022 .mr(2)
28023 .nr(4)
28024 .kr(8)
28025 .sr(1)
28026 .m(m)
28027 .n(n)
28028 .k(k)
28029 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028030 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070028031 }
28032 }
28033 }
28034 }
28035
28036 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_div_8) {
28037 TEST_REQUIRES_X86_XOP;
28038 for (size_t k = 16; k <= 80; k += 8) {
28039 GemmMicrokernelTester()
28040 .mr(2)
28041 .nr(4)
28042 .kr(8)
28043 .sr(1)
28044 .m(2)
28045 .n(4)
28046 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080028047 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070028048 }
28049 }
28050
28051 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_div_8_strided_a) {
28052 TEST_REQUIRES_X86_XOP;
28053 for (size_t k = 16; k <= 80; k += 8) {
28054 GemmMicrokernelTester()
28055 .mr(2)
28056 .nr(4)
28057 .kr(8)
28058 .sr(1)
28059 .m(2)
28060 .n(4)
28061 .k(k)
28062 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080028063 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070028064 }
28065 }
28066
28067 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_div_8_subtile) {
28068 TEST_REQUIRES_X86_XOP;
28069 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028070 for (uint32_t n = 1; n <= 4; n++) {
28071 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070028072 GemmMicrokernelTester()
28073 .mr(2)
28074 .nr(4)
28075 .kr(8)
28076 .sr(1)
28077 .m(m)
28078 .n(n)
28079 .k(k)
28080 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028081 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070028082 }
28083 }
28084 }
28085 }
28086
28087 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128, n_gt_4) {
28088 TEST_REQUIRES_X86_XOP;
28089 for (uint32_t n = 5; n < 8; n++) {
28090 for (size_t k = 1; k <= 40; k += 9) {
28091 GemmMicrokernelTester()
28092 .mr(2)
28093 .nr(4)
28094 .kr(8)
28095 .sr(1)
28096 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080028097 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070028098 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080028099 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070028100 }
28101 }
28102 }
28103
28104 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128, n_gt_4_strided_cn) {
28105 TEST_REQUIRES_X86_XOP;
28106 for (uint32_t n = 5; n < 8; n++) {
28107 for (size_t k = 1; k <= 40; k += 9) {
28108 GemmMicrokernelTester()
28109 .mr(2)
28110 .nr(4)
28111 .kr(8)
28112 .sr(1)
28113 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080028114 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070028115 .k(k)
28116 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080028117 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070028118 }
28119 }
28120 }
28121
28122 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128, n_gt_4_strided_a) {
28123 TEST_REQUIRES_X86_XOP;
28124 for (uint32_t n = 5; n < 8; n++) {
28125 for (size_t k = 1; k <= 40; k += 9) {
28126 GemmMicrokernelTester()
28127 .mr(2)
28128 .nr(4)
28129 .kr(8)
28130 .sr(1)
28131 .m(2)
28132 .n(n)
28133 .k(k)
28134 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080028135 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070028136 }
28137 }
28138 }
28139
28140 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128, n_gt_4_subtile) {
28141 TEST_REQUIRES_X86_XOP;
28142 for (uint32_t n = 5; n < 8; n++) {
28143 for (size_t k = 1; k <= 40; k += 9) {
28144 for (uint32_t m = 1; m <= 2; m++) {
28145 GemmMicrokernelTester()
28146 .mr(2)
28147 .nr(4)
28148 .kr(8)
28149 .sr(1)
28150 .m(m)
28151 .n(n)
28152 .k(k)
28153 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028154 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070028155 }
28156 }
28157 }
28158 }
28159
28160 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128, n_div_4) {
28161 TEST_REQUIRES_X86_XOP;
28162 for (uint32_t n = 8; n <= 12; n += 4) {
28163 for (size_t k = 1; k <= 40; k += 9) {
28164 GemmMicrokernelTester()
28165 .mr(2)
28166 .nr(4)
28167 .kr(8)
28168 .sr(1)
28169 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080028170 .n(n)
Marat Dukhanc46e6712021-06-01 19:00:16 -070028171 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080028172 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070028173 }
28174 }
28175 }
28176
28177 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128, n_div_4_strided_cn) {
28178 TEST_REQUIRES_X86_XOP;
28179 for (uint32_t n = 8; n <= 12; n += 4) {
28180 for (size_t k = 1; k <= 40; k += 9) {
28181 GemmMicrokernelTester()
28182 .mr(2)
28183 .nr(4)
28184 .kr(8)
28185 .sr(1)
28186 .m(2)
28187 .n(n)
28188 .k(k)
28189 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080028190 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070028191 }
28192 }
28193 }
28194
28195 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128, n_div_4_strided_a) {
28196 TEST_REQUIRES_X86_XOP;
28197 for (uint32_t n = 8; n <= 12; n += 4) {
28198 for (size_t k = 1; k <= 40; k += 9) {
28199 GemmMicrokernelTester()
28200 .mr(2)
28201 .nr(4)
28202 .kr(8)
28203 .sr(1)
28204 .m(2)
28205 .n(n)
28206 .k(k)
28207 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080028208 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070028209 }
28210 }
28211 }
28212
28213 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128, n_div_4_subtile) {
28214 TEST_REQUIRES_X86_XOP;
28215 for (uint32_t n = 8; n <= 12; n += 4) {
28216 for (size_t k = 1; k <= 40; k += 9) {
28217 for (uint32_t m = 1; m <= 2; m++) {
28218 GemmMicrokernelTester()
28219 .mr(2)
28220 .nr(4)
28221 .kr(8)
28222 .sr(1)
28223 .m(m)
28224 .n(n)
28225 .k(k)
28226 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028227 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070028228 }
28229 }
28230 }
28231 }
28232
28233 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128, strided_cm_subtile) {
28234 TEST_REQUIRES_X86_XOP;
28235 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028236 for (uint32_t n = 1; n <= 4; n++) {
28237 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanc46e6712021-06-01 19:00:16 -070028238 GemmMicrokernelTester()
28239 .mr(2)
28240 .nr(4)
28241 .kr(8)
28242 .sr(1)
28243 .m(m)
28244 .n(n)
28245 .k(k)
28246 .cm_stride(7)
28247 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028248 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070028249 }
28250 }
28251 }
28252 }
28253
28254 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128, qmin) {
28255 TEST_REQUIRES_X86_XOP;
28256 GemmMicrokernelTester()
28257 .mr(2)
28258 .nr(4)
28259 .kr(8)
28260 .sr(1)
28261 .m(2)
28262 .n(4)
28263 .k(8)
28264 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080028265 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070028266 }
28267
28268 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128, qmax) {
28269 TEST_REQUIRES_X86_XOP;
28270 GemmMicrokernelTester()
28271 .mr(2)
28272 .nr(4)
28273 .kr(8)
28274 .sr(1)
28275 .m(2)
28276 .n(4)
28277 .k(8)
28278 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080028279 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070028280 }
28281
28282 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__XOP_LD128, strided_cm) {
28283 TEST_REQUIRES_X86_XOP;
28284 GemmMicrokernelTester()
28285 .mr(2)
28286 .nr(4)
28287 .kr(8)
28288 .sr(1)
28289 .m(2)
28290 .n(4)
28291 .k(8)
28292 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080028293 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhanc46e6712021-06-01 19:00:16 -070028294 }
28295#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
28296
28297
28298#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan0ff79892021-08-06 16:05:06 -070028299 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C2__SSE2, k_eq_8) {
28300 TEST_REQUIRES_X86_SSE2;
28301 GemmMicrokernelTester()
28302 .extended_weights(true)
28303 .mr(2)
28304 .nr(4)
28305 .kr(2)
28306 .sr(1)
28307 .m(2)
28308 .n(4)
28309 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080028310 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c2__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070028311 }
28312
28313 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C2__SSE2, strided_cn) {
28314 TEST_REQUIRES_X86_SSE2;
28315 GemmMicrokernelTester()
28316 .extended_weights(true)
28317 .mr(2)
28318 .nr(4)
28319 .kr(2)
28320 .sr(1)
28321 .m(2)
28322 .n(4)
28323 .k(8)
28324 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080028325 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c2__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070028326 }
28327
28328 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C2__SSE2, k_eq_8_strided_a) {
28329 TEST_REQUIRES_X86_SSE2;
28330 GemmMicrokernelTester()
28331 .extended_weights(true)
28332 .mr(2)
28333 .nr(4)
28334 .kr(2)
28335 .sr(1)
28336 .m(2)
28337 .n(4)
28338 .k(8)
28339 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080028340 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c2__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070028341 }
28342
28343 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C2__SSE2, k_eq_8_subtile) {
28344 TEST_REQUIRES_X86_SSE2;
Zhi An Ng83844ae2022-01-14 09:52:25 -080028345 for (uint32_t n = 1; n <= 4; n++) {
28346 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan0ff79892021-08-06 16:05:06 -070028347 GemmMicrokernelTester()
28348 .extended_weights(true)
28349 .mr(2)
28350 .nr(4)
28351 .kr(2)
28352 .sr(1)
28353 .m(m)
28354 .n(n)
28355 .k(8)
28356 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028357 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c2__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070028358 }
28359 }
28360 }
28361
28362 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C2__SSE2, k_eq_8_subtile_m) {
28363 TEST_REQUIRES_X86_SSE2;
28364 for (uint32_t m = 1; m <= 2; m++) {
28365 GemmMicrokernelTester()
28366 .extended_weights(true)
28367 .mr(2)
28368 .nr(4)
28369 .kr(2)
28370 .sr(1)
28371 .m(m)
28372 .n(4)
28373 .k(8)
28374 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028375 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c2__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070028376 }
28377 }
28378
28379 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C2__SSE2, k_eq_8_subtile_n) {
28380 TEST_REQUIRES_X86_SSE2;
28381 for (uint32_t n = 1; n <= 4; n++) {
28382 GemmMicrokernelTester()
28383 .extended_weights(true)
28384 .mr(2)
28385 .nr(4)
28386 .kr(2)
28387 .sr(1)
28388 .m(2)
28389 .n(n)
28390 .k(8)
28391 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028392 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c2__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070028393 }
28394 }
28395
28396 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C2__SSE2, k_lt_8) {
28397 TEST_REQUIRES_X86_SSE2;
28398 for (size_t k = 1; k < 8; k++) {
28399 GemmMicrokernelTester()
28400 .extended_weights(true)
28401 .mr(2)
28402 .nr(4)
28403 .kr(2)
28404 .sr(1)
28405 .m(2)
28406 .n(4)
28407 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080028408 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c2__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070028409 }
28410 }
28411
28412 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C2__SSE2, k_lt_8_strided_a) {
28413 TEST_REQUIRES_X86_SSE2;
28414 for (size_t k = 1; k < 8; k++) {
28415 GemmMicrokernelTester()
28416 .extended_weights(true)
28417 .mr(2)
28418 .nr(4)
28419 .kr(2)
28420 .sr(1)
28421 .m(2)
28422 .n(4)
28423 .k(k)
28424 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080028425 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c2__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070028426 }
28427 }
28428
28429 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C2__SSE2, k_lt_8_subtile) {
28430 TEST_REQUIRES_X86_SSE2;
28431 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028432 for (uint32_t n = 1; n <= 4; n++) {
28433 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan0ff79892021-08-06 16:05:06 -070028434 GemmMicrokernelTester()
28435 .extended_weights(true)
28436 .mr(2)
28437 .nr(4)
28438 .kr(2)
28439 .sr(1)
28440 .m(m)
28441 .n(n)
28442 .k(k)
28443 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028444 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c2__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070028445 }
28446 }
28447 }
28448 }
28449
28450 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C2__SSE2, k_gt_8) {
28451 TEST_REQUIRES_X86_SSE2;
28452 for (size_t k = 9; k < 16; k++) {
28453 GemmMicrokernelTester()
28454 .extended_weights(true)
28455 .mr(2)
28456 .nr(4)
28457 .kr(2)
28458 .sr(1)
28459 .m(2)
28460 .n(4)
28461 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080028462 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c2__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070028463 }
28464 }
28465
28466 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C2__SSE2, k_gt_8_strided_a) {
28467 TEST_REQUIRES_X86_SSE2;
28468 for (size_t k = 9; k < 16; k++) {
28469 GemmMicrokernelTester()
28470 .extended_weights(true)
28471 .mr(2)
28472 .nr(4)
28473 .kr(2)
28474 .sr(1)
28475 .m(2)
28476 .n(4)
28477 .k(k)
28478 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080028479 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c2__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070028480 }
28481 }
28482
28483 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C2__SSE2, k_gt_8_subtile) {
28484 TEST_REQUIRES_X86_SSE2;
28485 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028486 for (uint32_t n = 1; n <= 4; n++) {
28487 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan0ff79892021-08-06 16:05:06 -070028488 GemmMicrokernelTester()
28489 .extended_weights(true)
28490 .mr(2)
28491 .nr(4)
28492 .kr(2)
28493 .sr(1)
28494 .m(m)
28495 .n(n)
28496 .k(k)
28497 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028498 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c2__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070028499 }
28500 }
28501 }
28502 }
28503
28504 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C2__SSE2, k_div_8) {
28505 TEST_REQUIRES_X86_SSE2;
28506 for (size_t k = 16; k <= 80; k += 8) {
28507 GemmMicrokernelTester()
28508 .extended_weights(true)
28509 .mr(2)
28510 .nr(4)
28511 .kr(2)
28512 .sr(1)
28513 .m(2)
28514 .n(4)
28515 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080028516 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c2__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070028517 }
28518 }
28519
28520 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C2__SSE2, k_div_8_strided_a) {
28521 TEST_REQUIRES_X86_SSE2;
28522 for (size_t k = 16; k <= 80; k += 8) {
28523 GemmMicrokernelTester()
28524 .extended_weights(true)
28525 .mr(2)
28526 .nr(4)
28527 .kr(2)
28528 .sr(1)
28529 .m(2)
28530 .n(4)
28531 .k(k)
28532 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080028533 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c2__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070028534 }
28535 }
28536
28537 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C2__SSE2, k_div_8_subtile) {
28538 TEST_REQUIRES_X86_SSE2;
28539 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028540 for (uint32_t n = 1; n <= 4; n++) {
28541 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan0ff79892021-08-06 16:05:06 -070028542 GemmMicrokernelTester()
28543 .extended_weights(true)
28544 .mr(2)
28545 .nr(4)
28546 .kr(2)
28547 .sr(1)
28548 .m(m)
28549 .n(n)
28550 .k(k)
28551 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028552 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c2__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070028553 }
28554 }
28555 }
28556 }
28557
28558 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C2__SSE2, n_gt_4) {
28559 TEST_REQUIRES_X86_SSE2;
28560 for (uint32_t n = 5; n < 8; n++) {
28561 for (size_t k = 1; k <= 40; k += 9) {
28562 GemmMicrokernelTester()
28563 .extended_weights(true)
28564 .mr(2)
28565 .nr(4)
28566 .kr(2)
28567 .sr(1)
28568 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080028569 .n(n)
Marat Dukhan0ff79892021-08-06 16:05:06 -070028570 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080028571 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c2__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070028572 }
28573 }
28574 }
28575
28576 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C2__SSE2, n_gt_4_strided_cn) {
28577 TEST_REQUIRES_X86_SSE2;
28578 for (uint32_t n = 5; n < 8; n++) {
28579 for (size_t k = 1; k <= 40; k += 9) {
28580 GemmMicrokernelTester()
28581 .extended_weights(true)
28582 .mr(2)
28583 .nr(4)
28584 .kr(2)
28585 .sr(1)
28586 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080028587 .n(n)
Marat Dukhan0ff79892021-08-06 16:05:06 -070028588 .k(k)
28589 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080028590 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c2__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070028591 }
28592 }
28593 }
28594
28595 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C2__SSE2, n_gt_4_strided_a) {
28596 TEST_REQUIRES_X86_SSE2;
28597 for (uint32_t n = 5; n < 8; n++) {
28598 for (size_t k = 1; k <= 40; k += 9) {
28599 GemmMicrokernelTester()
28600 .extended_weights(true)
28601 .mr(2)
28602 .nr(4)
28603 .kr(2)
28604 .sr(1)
28605 .m(2)
28606 .n(n)
28607 .k(k)
28608 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080028609 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c2__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070028610 }
28611 }
28612 }
28613
28614 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C2__SSE2, n_gt_4_subtile) {
28615 TEST_REQUIRES_X86_SSE2;
28616 for (uint32_t n = 5; n < 8; n++) {
28617 for (size_t k = 1; k <= 40; k += 9) {
28618 for (uint32_t m = 1; m <= 2; m++) {
28619 GemmMicrokernelTester()
28620 .extended_weights(true)
28621 .mr(2)
28622 .nr(4)
28623 .kr(2)
28624 .sr(1)
28625 .m(m)
28626 .n(n)
28627 .k(k)
28628 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028629 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c2__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070028630 }
28631 }
28632 }
28633 }
28634
28635 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C2__SSE2, n_div_4) {
28636 TEST_REQUIRES_X86_SSE2;
28637 for (uint32_t n = 8; n <= 12; n += 4) {
28638 for (size_t k = 1; k <= 40; k += 9) {
28639 GemmMicrokernelTester()
28640 .extended_weights(true)
28641 .mr(2)
28642 .nr(4)
28643 .kr(2)
28644 .sr(1)
28645 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080028646 .n(n)
Marat Dukhan0ff79892021-08-06 16:05:06 -070028647 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080028648 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c2__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070028649 }
28650 }
28651 }
28652
28653 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C2__SSE2, n_div_4_strided_cn) {
28654 TEST_REQUIRES_X86_SSE2;
28655 for (uint32_t n = 8; n <= 12; n += 4) {
28656 for (size_t k = 1; k <= 40; k += 9) {
28657 GemmMicrokernelTester()
28658 .extended_weights(true)
28659 .mr(2)
28660 .nr(4)
28661 .kr(2)
28662 .sr(1)
28663 .m(2)
28664 .n(n)
28665 .k(k)
28666 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080028667 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c2__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070028668 }
28669 }
28670 }
28671
28672 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C2__SSE2, n_div_4_strided_a) {
28673 TEST_REQUIRES_X86_SSE2;
28674 for (uint32_t n = 8; n <= 12; n += 4) {
28675 for (size_t k = 1; k <= 40; k += 9) {
28676 GemmMicrokernelTester()
28677 .extended_weights(true)
28678 .mr(2)
28679 .nr(4)
28680 .kr(2)
28681 .sr(1)
28682 .m(2)
28683 .n(n)
28684 .k(k)
28685 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080028686 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c2__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070028687 }
28688 }
28689 }
28690
28691 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C2__SSE2, n_div_4_subtile) {
28692 TEST_REQUIRES_X86_SSE2;
28693 for (uint32_t n = 8; n <= 12; n += 4) {
28694 for (size_t k = 1; k <= 40; k += 9) {
28695 for (uint32_t m = 1; m <= 2; m++) {
28696 GemmMicrokernelTester()
28697 .extended_weights(true)
28698 .mr(2)
28699 .nr(4)
28700 .kr(2)
28701 .sr(1)
28702 .m(m)
28703 .n(n)
28704 .k(k)
28705 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028706 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c2__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070028707 }
28708 }
28709 }
28710 }
28711
28712 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C2__SSE2, strided_cm_subtile) {
28713 TEST_REQUIRES_X86_SSE2;
28714 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028715 for (uint32_t n = 1; n <= 4; n++) {
28716 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan0ff79892021-08-06 16:05:06 -070028717 GemmMicrokernelTester()
28718 .extended_weights(true)
28719 .mr(2)
28720 .nr(4)
28721 .kr(2)
28722 .sr(1)
28723 .m(m)
28724 .n(n)
28725 .k(k)
28726 .cm_stride(7)
28727 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028728 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c2__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070028729 }
28730 }
28731 }
28732 }
28733
28734 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C2__SSE2, strided_cm) {
28735 TEST_REQUIRES_X86_SSE2;
28736 GemmMicrokernelTester()
28737 .extended_weights(true)
28738 .mr(2)
28739 .nr(4)
28740 .kr(2)
28741 .sr(1)
28742 .m(2)
28743 .n(4)
28744 .k(8)
28745 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080028746 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c2__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070028747 }
28748#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
28749
28750
28751#if XNN_ARCH_X86 || XNN_ARCH_X86_64
28752 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C2__SSE2, k_eq_8) {
28753 TEST_REQUIRES_X86_SSE2;
28754 GemmMicrokernelTester()
28755 .extended_weights(true)
28756 .mr(3)
28757 .nr(4)
28758 .kr(2)
28759 .sr(1)
28760 .m(3)
28761 .n(4)
28762 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080028763 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070028764 }
28765
28766 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C2__SSE2, strided_cn) {
28767 TEST_REQUIRES_X86_SSE2;
28768 GemmMicrokernelTester()
28769 .extended_weights(true)
28770 .mr(3)
28771 .nr(4)
28772 .kr(2)
28773 .sr(1)
28774 .m(3)
28775 .n(4)
28776 .k(8)
28777 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080028778 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070028779 }
28780
28781 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C2__SSE2, k_eq_8_strided_a) {
28782 TEST_REQUIRES_X86_SSE2;
28783 GemmMicrokernelTester()
28784 .extended_weights(true)
28785 .mr(3)
28786 .nr(4)
28787 .kr(2)
28788 .sr(1)
28789 .m(3)
28790 .n(4)
28791 .k(8)
28792 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080028793 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070028794 }
28795
28796 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C2__SSE2, k_eq_8_subtile) {
28797 TEST_REQUIRES_X86_SSE2;
Zhi An Ng83844ae2022-01-14 09:52:25 -080028798 for (uint32_t n = 1; n <= 4; n++) {
28799 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan0ff79892021-08-06 16:05:06 -070028800 GemmMicrokernelTester()
28801 .extended_weights(true)
28802 .mr(3)
28803 .nr(4)
28804 .kr(2)
28805 .sr(1)
28806 .m(m)
28807 .n(n)
28808 .k(8)
28809 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028810 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070028811 }
28812 }
28813 }
28814
28815 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C2__SSE2, k_eq_8_subtile_m) {
28816 TEST_REQUIRES_X86_SSE2;
28817 for (uint32_t m = 1; m <= 3; m++) {
28818 GemmMicrokernelTester()
28819 .extended_weights(true)
28820 .mr(3)
28821 .nr(4)
28822 .kr(2)
28823 .sr(1)
28824 .m(m)
28825 .n(4)
28826 .k(8)
28827 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028828 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070028829 }
28830 }
28831
28832 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C2__SSE2, k_eq_8_subtile_n) {
28833 TEST_REQUIRES_X86_SSE2;
28834 for (uint32_t n = 1; n <= 4; n++) {
28835 GemmMicrokernelTester()
28836 .extended_weights(true)
28837 .mr(3)
28838 .nr(4)
28839 .kr(2)
28840 .sr(1)
28841 .m(3)
28842 .n(n)
28843 .k(8)
28844 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028845 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070028846 }
28847 }
28848
28849 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C2__SSE2, k_lt_8) {
28850 TEST_REQUIRES_X86_SSE2;
28851 for (size_t k = 1; k < 8; k++) {
28852 GemmMicrokernelTester()
28853 .extended_weights(true)
28854 .mr(3)
28855 .nr(4)
28856 .kr(2)
28857 .sr(1)
28858 .m(3)
28859 .n(4)
28860 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080028861 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070028862 }
28863 }
28864
28865 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C2__SSE2, k_lt_8_strided_a) {
28866 TEST_REQUIRES_X86_SSE2;
28867 for (size_t k = 1; k < 8; k++) {
28868 GemmMicrokernelTester()
28869 .extended_weights(true)
28870 .mr(3)
28871 .nr(4)
28872 .kr(2)
28873 .sr(1)
28874 .m(3)
28875 .n(4)
28876 .k(k)
28877 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080028878 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070028879 }
28880 }
28881
28882 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C2__SSE2, k_lt_8_subtile) {
28883 TEST_REQUIRES_X86_SSE2;
28884 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028885 for (uint32_t n = 1; n <= 4; n++) {
28886 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan0ff79892021-08-06 16:05:06 -070028887 GemmMicrokernelTester()
28888 .extended_weights(true)
28889 .mr(3)
28890 .nr(4)
28891 .kr(2)
28892 .sr(1)
28893 .m(m)
28894 .n(n)
28895 .k(k)
28896 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028897 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070028898 }
28899 }
28900 }
28901 }
28902
28903 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C2__SSE2, k_gt_8) {
28904 TEST_REQUIRES_X86_SSE2;
28905 for (size_t k = 9; k < 16; k++) {
28906 GemmMicrokernelTester()
28907 .extended_weights(true)
28908 .mr(3)
28909 .nr(4)
28910 .kr(2)
28911 .sr(1)
28912 .m(3)
28913 .n(4)
28914 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080028915 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070028916 }
28917 }
28918
28919 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C2__SSE2, k_gt_8_strided_a) {
28920 TEST_REQUIRES_X86_SSE2;
28921 for (size_t k = 9; k < 16; k++) {
28922 GemmMicrokernelTester()
28923 .extended_weights(true)
28924 .mr(3)
28925 .nr(4)
28926 .kr(2)
28927 .sr(1)
28928 .m(3)
28929 .n(4)
28930 .k(k)
28931 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080028932 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070028933 }
28934 }
28935
28936 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C2__SSE2, k_gt_8_subtile) {
28937 TEST_REQUIRES_X86_SSE2;
28938 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028939 for (uint32_t n = 1; n <= 4; n++) {
28940 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan0ff79892021-08-06 16:05:06 -070028941 GemmMicrokernelTester()
28942 .extended_weights(true)
28943 .mr(3)
28944 .nr(4)
28945 .kr(2)
28946 .sr(1)
28947 .m(m)
28948 .n(n)
28949 .k(k)
28950 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028951 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070028952 }
28953 }
28954 }
28955 }
28956
28957 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C2__SSE2, k_div_8) {
28958 TEST_REQUIRES_X86_SSE2;
28959 for (size_t k = 16; k <= 80; k += 8) {
28960 GemmMicrokernelTester()
28961 .extended_weights(true)
28962 .mr(3)
28963 .nr(4)
28964 .kr(2)
28965 .sr(1)
28966 .m(3)
28967 .n(4)
28968 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080028969 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070028970 }
28971 }
28972
28973 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C2__SSE2, k_div_8_strided_a) {
28974 TEST_REQUIRES_X86_SSE2;
28975 for (size_t k = 16; k <= 80; k += 8) {
28976 GemmMicrokernelTester()
28977 .extended_weights(true)
28978 .mr(3)
28979 .nr(4)
28980 .kr(2)
28981 .sr(1)
28982 .m(3)
28983 .n(4)
28984 .k(k)
28985 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080028986 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070028987 }
28988 }
28989
28990 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C2__SSE2, k_div_8_subtile) {
28991 TEST_REQUIRES_X86_SSE2;
28992 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028993 for (uint32_t n = 1; n <= 4; n++) {
28994 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan0ff79892021-08-06 16:05:06 -070028995 GemmMicrokernelTester()
28996 .extended_weights(true)
28997 .mr(3)
28998 .nr(4)
28999 .kr(2)
29000 .sr(1)
29001 .m(m)
29002 .n(n)
29003 .k(k)
29004 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029005 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070029006 }
29007 }
29008 }
29009 }
29010
29011 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C2__SSE2, n_gt_4) {
29012 TEST_REQUIRES_X86_SSE2;
29013 for (uint32_t n = 5; n < 8; n++) {
29014 for (size_t k = 1; k <= 40; k += 9) {
29015 GemmMicrokernelTester()
29016 .extended_weights(true)
29017 .mr(3)
29018 .nr(4)
29019 .kr(2)
29020 .sr(1)
29021 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080029022 .n(n)
Marat Dukhan0ff79892021-08-06 16:05:06 -070029023 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080029024 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070029025 }
29026 }
29027 }
29028
29029 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C2__SSE2, n_gt_4_strided_cn) {
29030 TEST_REQUIRES_X86_SSE2;
29031 for (uint32_t n = 5; n < 8; n++) {
29032 for (size_t k = 1; k <= 40; k += 9) {
29033 GemmMicrokernelTester()
29034 .extended_weights(true)
29035 .mr(3)
29036 .nr(4)
29037 .kr(2)
29038 .sr(1)
29039 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080029040 .n(n)
Marat Dukhan0ff79892021-08-06 16:05:06 -070029041 .k(k)
29042 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080029043 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070029044 }
29045 }
29046 }
29047
29048 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C2__SSE2, n_gt_4_strided_a) {
29049 TEST_REQUIRES_X86_SSE2;
29050 for (uint32_t n = 5; n < 8; n++) {
29051 for (size_t k = 1; k <= 40; k += 9) {
29052 GemmMicrokernelTester()
29053 .extended_weights(true)
29054 .mr(3)
29055 .nr(4)
29056 .kr(2)
29057 .sr(1)
29058 .m(3)
29059 .n(n)
29060 .k(k)
29061 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080029062 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070029063 }
29064 }
29065 }
29066
29067 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C2__SSE2, n_gt_4_subtile) {
29068 TEST_REQUIRES_X86_SSE2;
29069 for (uint32_t n = 5; n < 8; n++) {
29070 for (size_t k = 1; k <= 40; k += 9) {
29071 for (uint32_t m = 1; m <= 3; m++) {
29072 GemmMicrokernelTester()
29073 .extended_weights(true)
29074 .mr(3)
29075 .nr(4)
29076 .kr(2)
29077 .sr(1)
29078 .m(m)
29079 .n(n)
29080 .k(k)
29081 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029082 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070029083 }
29084 }
29085 }
29086 }
29087
29088 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C2__SSE2, n_div_4) {
29089 TEST_REQUIRES_X86_SSE2;
29090 for (uint32_t n = 8; n <= 12; n += 4) {
29091 for (size_t k = 1; k <= 40; k += 9) {
29092 GemmMicrokernelTester()
29093 .extended_weights(true)
29094 .mr(3)
29095 .nr(4)
29096 .kr(2)
29097 .sr(1)
29098 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080029099 .n(n)
Marat Dukhan0ff79892021-08-06 16:05:06 -070029100 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080029101 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070029102 }
29103 }
29104 }
29105
29106 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C2__SSE2, n_div_4_strided_cn) {
29107 TEST_REQUIRES_X86_SSE2;
29108 for (uint32_t n = 8; n <= 12; n += 4) {
29109 for (size_t k = 1; k <= 40; k += 9) {
29110 GemmMicrokernelTester()
29111 .extended_weights(true)
29112 .mr(3)
29113 .nr(4)
29114 .kr(2)
29115 .sr(1)
29116 .m(3)
29117 .n(n)
29118 .k(k)
29119 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080029120 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070029121 }
29122 }
29123 }
29124
29125 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C2__SSE2, n_div_4_strided_a) {
29126 TEST_REQUIRES_X86_SSE2;
29127 for (uint32_t n = 8; n <= 12; n += 4) {
29128 for (size_t k = 1; k <= 40; k += 9) {
29129 GemmMicrokernelTester()
29130 .extended_weights(true)
29131 .mr(3)
29132 .nr(4)
29133 .kr(2)
29134 .sr(1)
29135 .m(3)
29136 .n(n)
29137 .k(k)
29138 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080029139 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070029140 }
29141 }
29142 }
29143
29144 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C2__SSE2, n_div_4_subtile) {
29145 TEST_REQUIRES_X86_SSE2;
29146 for (uint32_t n = 8; n <= 12; n += 4) {
29147 for (size_t k = 1; k <= 40; k += 9) {
29148 for (uint32_t m = 1; m <= 3; m++) {
29149 GemmMicrokernelTester()
29150 .extended_weights(true)
29151 .mr(3)
29152 .nr(4)
29153 .kr(2)
29154 .sr(1)
29155 .m(m)
29156 .n(n)
29157 .k(k)
29158 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029159 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070029160 }
29161 }
29162 }
29163 }
29164
29165 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C2__SSE2, strided_cm_subtile) {
29166 TEST_REQUIRES_X86_SSE2;
29167 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029168 for (uint32_t n = 1; n <= 4; n++) {
29169 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan0ff79892021-08-06 16:05:06 -070029170 GemmMicrokernelTester()
29171 .extended_weights(true)
29172 .mr(3)
29173 .nr(4)
29174 .kr(2)
29175 .sr(1)
29176 .m(m)
29177 .n(n)
29178 .k(k)
29179 .cm_stride(7)
29180 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029181 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070029182 }
29183 }
29184 }
29185 }
29186
29187 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C2__SSE2, strided_cm) {
29188 TEST_REQUIRES_X86_SSE2;
29189 GemmMicrokernelTester()
29190 .extended_weights(true)
29191 .mr(3)
29192 .nr(4)
29193 .kr(2)
29194 .sr(1)
29195 .m(3)
29196 .n(4)
29197 .k(8)
29198 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080029199 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070029200 }
29201#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
29202
29203
29204#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan0ff79892021-08-06 16:05:06 -070029205 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C2__SSE41, k_eq_8) {
29206 TEST_REQUIRES_X86_SSE41;
29207 GemmMicrokernelTester()
29208 .extended_weights(true)
29209 .mr(3)
29210 .nr(4)
29211 .kr(2)
29212 .sr(1)
29213 .m(3)
29214 .n(4)
29215 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080029216 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__sse41, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070029217 }
29218
29219 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C2__SSE41, strided_cn) {
29220 TEST_REQUIRES_X86_SSE41;
29221 GemmMicrokernelTester()
29222 .extended_weights(true)
29223 .mr(3)
29224 .nr(4)
29225 .kr(2)
29226 .sr(1)
29227 .m(3)
29228 .n(4)
29229 .k(8)
29230 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080029231 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__sse41, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070029232 }
29233
29234 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C2__SSE41, k_eq_8_strided_a) {
29235 TEST_REQUIRES_X86_SSE41;
29236 GemmMicrokernelTester()
29237 .extended_weights(true)
29238 .mr(3)
29239 .nr(4)
29240 .kr(2)
29241 .sr(1)
29242 .m(3)
29243 .n(4)
29244 .k(8)
29245 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080029246 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__sse41, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070029247 }
29248
29249 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C2__SSE41, k_eq_8_subtile) {
29250 TEST_REQUIRES_X86_SSE41;
Zhi An Ng83844ae2022-01-14 09:52:25 -080029251 for (uint32_t n = 1; n <= 4; n++) {
29252 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan0ff79892021-08-06 16:05:06 -070029253 GemmMicrokernelTester()
29254 .extended_weights(true)
29255 .mr(3)
29256 .nr(4)
29257 .kr(2)
29258 .sr(1)
29259 .m(m)
29260 .n(n)
29261 .k(8)
29262 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029263 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__sse41, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070029264 }
29265 }
29266 }
29267
29268 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C2__SSE41, k_eq_8_subtile_m) {
29269 TEST_REQUIRES_X86_SSE41;
29270 for (uint32_t m = 1; m <= 3; m++) {
29271 GemmMicrokernelTester()
29272 .extended_weights(true)
29273 .mr(3)
29274 .nr(4)
29275 .kr(2)
29276 .sr(1)
29277 .m(m)
29278 .n(4)
29279 .k(8)
29280 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029281 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__sse41, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070029282 }
29283 }
29284
29285 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C2__SSE41, k_eq_8_subtile_n) {
29286 TEST_REQUIRES_X86_SSE41;
29287 for (uint32_t n = 1; n <= 4; n++) {
29288 GemmMicrokernelTester()
29289 .extended_weights(true)
29290 .mr(3)
29291 .nr(4)
29292 .kr(2)
29293 .sr(1)
29294 .m(3)
29295 .n(n)
29296 .k(8)
29297 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029298 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__sse41, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070029299 }
29300 }
29301
29302 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C2__SSE41, k_lt_8) {
29303 TEST_REQUIRES_X86_SSE41;
29304 for (size_t k = 1; k < 8; k++) {
29305 GemmMicrokernelTester()
29306 .extended_weights(true)
29307 .mr(3)
29308 .nr(4)
29309 .kr(2)
29310 .sr(1)
29311 .m(3)
29312 .n(4)
29313 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080029314 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__sse41, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070029315 }
29316 }
29317
29318 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C2__SSE41, k_lt_8_strided_a) {
29319 TEST_REQUIRES_X86_SSE41;
29320 for (size_t k = 1; k < 8; k++) {
29321 GemmMicrokernelTester()
29322 .extended_weights(true)
29323 .mr(3)
29324 .nr(4)
29325 .kr(2)
29326 .sr(1)
29327 .m(3)
29328 .n(4)
29329 .k(k)
29330 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080029331 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__sse41, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070029332 }
29333 }
29334
29335 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C2__SSE41, k_lt_8_subtile) {
29336 TEST_REQUIRES_X86_SSE41;
29337 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029338 for (uint32_t n = 1; n <= 4; n++) {
29339 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan0ff79892021-08-06 16:05:06 -070029340 GemmMicrokernelTester()
29341 .extended_weights(true)
29342 .mr(3)
29343 .nr(4)
29344 .kr(2)
29345 .sr(1)
29346 .m(m)
29347 .n(n)
29348 .k(k)
29349 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029350 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__sse41, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070029351 }
29352 }
29353 }
29354 }
29355
29356 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C2__SSE41, k_gt_8) {
29357 TEST_REQUIRES_X86_SSE41;
29358 for (size_t k = 9; k < 16; k++) {
29359 GemmMicrokernelTester()
29360 .extended_weights(true)
29361 .mr(3)
29362 .nr(4)
29363 .kr(2)
29364 .sr(1)
29365 .m(3)
29366 .n(4)
29367 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080029368 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__sse41, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070029369 }
29370 }
29371
29372 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C2__SSE41, k_gt_8_strided_a) {
29373 TEST_REQUIRES_X86_SSE41;
29374 for (size_t k = 9; k < 16; k++) {
29375 GemmMicrokernelTester()
29376 .extended_weights(true)
29377 .mr(3)
29378 .nr(4)
29379 .kr(2)
29380 .sr(1)
29381 .m(3)
29382 .n(4)
29383 .k(k)
29384 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080029385 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__sse41, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070029386 }
29387 }
29388
29389 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C2__SSE41, k_gt_8_subtile) {
29390 TEST_REQUIRES_X86_SSE41;
29391 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029392 for (uint32_t n = 1; n <= 4; n++) {
29393 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan0ff79892021-08-06 16:05:06 -070029394 GemmMicrokernelTester()
29395 .extended_weights(true)
29396 .mr(3)
29397 .nr(4)
29398 .kr(2)
29399 .sr(1)
29400 .m(m)
29401 .n(n)
29402 .k(k)
29403 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029404 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__sse41, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070029405 }
29406 }
29407 }
29408 }
29409
29410 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C2__SSE41, k_div_8) {
29411 TEST_REQUIRES_X86_SSE41;
29412 for (size_t k = 16; k <= 80; k += 8) {
29413 GemmMicrokernelTester()
29414 .extended_weights(true)
29415 .mr(3)
29416 .nr(4)
29417 .kr(2)
29418 .sr(1)
29419 .m(3)
29420 .n(4)
29421 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080029422 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__sse41, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070029423 }
29424 }
29425
29426 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C2__SSE41, k_div_8_strided_a) {
29427 TEST_REQUIRES_X86_SSE41;
29428 for (size_t k = 16; k <= 80; k += 8) {
29429 GemmMicrokernelTester()
29430 .extended_weights(true)
29431 .mr(3)
29432 .nr(4)
29433 .kr(2)
29434 .sr(1)
29435 .m(3)
29436 .n(4)
29437 .k(k)
29438 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080029439 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__sse41, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070029440 }
29441 }
29442
29443 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C2__SSE41, k_div_8_subtile) {
29444 TEST_REQUIRES_X86_SSE41;
29445 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029446 for (uint32_t n = 1; n <= 4; n++) {
29447 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan0ff79892021-08-06 16:05:06 -070029448 GemmMicrokernelTester()
29449 .extended_weights(true)
29450 .mr(3)
29451 .nr(4)
29452 .kr(2)
29453 .sr(1)
29454 .m(m)
29455 .n(n)
29456 .k(k)
29457 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029458 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__sse41, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070029459 }
29460 }
29461 }
29462 }
29463
29464 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C2__SSE41, n_gt_4) {
29465 TEST_REQUIRES_X86_SSE41;
29466 for (uint32_t n = 5; n < 8; n++) {
29467 for (size_t k = 1; k <= 40; k += 9) {
29468 GemmMicrokernelTester()
29469 .extended_weights(true)
29470 .mr(3)
29471 .nr(4)
29472 .kr(2)
29473 .sr(1)
29474 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080029475 .n(n)
Marat Dukhan0ff79892021-08-06 16:05:06 -070029476 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080029477 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__sse41, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070029478 }
29479 }
29480 }
29481
29482 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C2__SSE41, n_gt_4_strided_cn) {
29483 TEST_REQUIRES_X86_SSE41;
29484 for (uint32_t n = 5; n < 8; n++) {
29485 for (size_t k = 1; k <= 40; k += 9) {
29486 GemmMicrokernelTester()
29487 .extended_weights(true)
29488 .mr(3)
29489 .nr(4)
29490 .kr(2)
29491 .sr(1)
29492 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080029493 .n(n)
Marat Dukhan0ff79892021-08-06 16:05:06 -070029494 .k(k)
29495 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080029496 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__sse41, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070029497 }
29498 }
29499 }
29500
29501 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C2__SSE41, n_gt_4_strided_a) {
29502 TEST_REQUIRES_X86_SSE41;
29503 for (uint32_t n = 5; n < 8; n++) {
29504 for (size_t k = 1; k <= 40; k += 9) {
29505 GemmMicrokernelTester()
29506 .extended_weights(true)
29507 .mr(3)
29508 .nr(4)
29509 .kr(2)
29510 .sr(1)
29511 .m(3)
29512 .n(n)
29513 .k(k)
29514 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080029515 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__sse41, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070029516 }
29517 }
29518 }
29519
29520 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C2__SSE41, n_gt_4_subtile) {
29521 TEST_REQUIRES_X86_SSE41;
29522 for (uint32_t n = 5; n < 8; n++) {
29523 for (size_t k = 1; k <= 40; k += 9) {
29524 for (uint32_t m = 1; m <= 3; m++) {
29525 GemmMicrokernelTester()
29526 .extended_weights(true)
29527 .mr(3)
29528 .nr(4)
29529 .kr(2)
29530 .sr(1)
29531 .m(m)
29532 .n(n)
29533 .k(k)
29534 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029535 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__sse41, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070029536 }
29537 }
29538 }
29539 }
29540
29541 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C2__SSE41, n_div_4) {
29542 TEST_REQUIRES_X86_SSE41;
29543 for (uint32_t n = 8; n <= 12; n += 4) {
29544 for (size_t k = 1; k <= 40; k += 9) {
29545 GemmMicrokernelTester()
29546 .extended_weights(true)
29547 .mr(3)
29548 .nr(4)
29549 .kr(2)
29550 .sr(1)
29551 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080029552 .n(n)
Marat Dukhan0ff79892021-08-06 16:05:06 -070029553 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080029554 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__sse41, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070029555 }
29556 }
29557 }
29558
29559 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C2__SSE41, n_div_4_strided_cn) {
29560 TEST_REQUIRES_X86_SSE41;
29561 for (uint32_t n = 8; n <= 12; n += 4) {
29562 for (size_t k = 1; k <= 40; k += 9) {
29563 GemmMicrokernelTester()
29564 .extended_weights(true)
29565 .mr(3)
29566 .nr(4)
29567 .kr(2)
29568 .sr(1)
29569 .m(3)
29570 .n(n)
29571 .k(k)
29572 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080029573 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__sse41, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070029574 }
29575 }
29576 }
29577
29578 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C2__SSE41, n_div_4_strided_a) {
29579 TEST_REQUIRES_X86_SSE41;
29580 for (uint32_t n = 8; n <= 12; n += 4) {
29581 for (size_t k = 1; k <= 40; k += 9) {
29582 GemmMicrokernelTester()
29583 .extended_weights(true)
29584 .mr(3)
29585 .nr(4)
29586 .kr(2)
29587 .sr(1)
29588 .m(3)
29589 .n(n)
29590 .k(k)
29591 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080029592 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__sse41, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070029593 }
29594 }
29595 }
29596
29597 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C2__SSE41, n_div_4_subtile) {
29598 TEST_REQUIRES_X86_SSE41;
29599 for (uint32_t n = 8; n <= 12; n += 4) {
29600 for (size_t k = 1; k <= 40; k += 9) {
29601 for (uint32_t m = 1; m <= 3; m++) {
29602 GemmMicrokernelTester()
29603 .extended_weights(true)
29604 .mr(3)
29605 .nr(4)
29606 .kr(2)
29607 .sr(1)
29608 .m(m)
29609 .n(n)
29610 .k(k)
29611 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029612 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__sse41, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070029613 }
29614 }
29615 }
29616 }
29617
29618 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C2__SSE41, strided_cm_subtile) {
29619 TEST_REQUIRES_X86_SSE41;
29620 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029621 for (uint32_t n = 1; n <= 4; n++) {
29622 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan0ff79892021-08-06 16:05:06 -070029623 GemmMicrokernelTester()
29624 .extended_weights(true)
29625 .mr(3)
29626 .nr(4)
29627 .kr(2)
29628 .sr(1)
29629 .m(m)
29630 .n(n)
29631 .k(k)
29632 .cm_stride(7)
29633 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029634 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__sse41, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070029635 }
29636 }
29637 }
29638 }
29639
29640 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C2__SSE41, strided_cm) {
29641 TEST_REQUIRES_X86_SSE41;
29642 GemmMicrokernelTester()
29643 .extended_weights(true)
29644 .mr(3)
29645 .nr(4)
29646 .kr(2)
29647 .sr(1)
29648 .m(3)
29649 .n(4)
29650 .k(8)
29651 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080029652 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__sse41, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070029653 }
29654#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
29655
29656
29657#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan0ff79892021-08-06 16:05:06 -070029658 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C2__AVX, k_eq_8) {
29659 TEST_REQUIRES_X86_AVX;
29660 GemmMicrokernelTester()
29661 .extended_weights(true)
29662 .mr(2)
29663 .nr(4)
29664 .kr(2)
29665 .sr(1)
29666 .m(2)
29667 .n(4)
29668 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080029669 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c2__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070029670 }
29671
29672 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C2__AVX, strided_cn) {
29673 TEST_REQUIRES_X86_AVX;
29674 GemmMicrokernelTester()
29675 .extended_weights(true)
29676 .mr(2)
29677 .nr(4)
29678 .kr(2)
29679 .sr(1)
29680 .m(2)
29681 .n(4)
29682 .k(8)
29683 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080029684 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c2__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070029685 }
29686
29687 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C2__AVX, k_eq_8_strided_a) {
29688 TEST_REQUIRES_X86_AVX;
29689 GemmMicrokernelTester()
29690 .extended_weights(true)
29691 .mr(2)
29692 .nr(4)
29693 .kr(2)
29694 .sr(1)
29695 .m(2)
29696 .n(4)
29697 .k(8)
29698 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080029699 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c2__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070029700 }
29701
29702 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C2__AVX, k_eq_8_subtile) {
29703 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080029704 for (uint32_t n = 1; n <= 4; n++) {
29705 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan0ff79892021-08-06 16:05:06 -070029706 GemmMicrokernelTester()
29707 .extended_weights(true)
29708 .mr(2)
29709 .nr(4)
29710 .kr(2)
29711 .sr(1)
29712 .m(m)
29713 .n(n)
29714 .k(8)
29715 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029716 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c2__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070029717 }
29718 }
29719 }
29720
29721 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C2__AVX, k_eq_8_subtile_m) {
29722 TEST_REQUIRES_X86_AVX;
29723 for (uint32_t m = 1; m <= 2; m++) {
29724 GemmMicrokernelTester()
29725 .extended_weights(true)
29726 .mr(2)
29727 .nr(4)
29728 .kr(2)
29729 .sr(1)
29730 .m(m)
29731 .n(4)
29732 .k(8)
29733 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029734 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c2__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070029735 }
29736 }
29737
29738 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C2__AVX, k_eq_8_subtile_n) {
29739 TEST_REQUIRES_X86_AVX;
29740 for (uint32_t n = 1; n <= 4; n++) {
29741 GemmMicrokernelTester()
29742 .extended_weights(true)
29743 .mr(2)
29744 .nr(4)
29745 .kr(2)
29746 .sr(1)
29747 .m(2)
29748 .n(n)
29749 .k(8)
29750 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029751 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c2__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070029752 }
29753 }
29754
29755 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C2__AVX, k_lt_8) {
29756 TEST_REQUIRES_X86_AVX;
29757 for (size_t k = 1; k < 8; k++) {
29758 GemmMicrokernelTester()
29759 .extended_weights(true)
29760 .mr(2)
29761 .nr(4)
29762 .kr(2)
29763 .sr(1)
29764 .m(2)
29765 .n(4)
29766 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080029767 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c2__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070029768 }
29769 }
29770
29771 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C2__AVX, k_lt_8_strided_a) {
29772 TEST_REQUIRES_X86_AVX;
29773 for (size_t k = 1; k < 8; k++) {
29774 GemmMicrokernelTester()
29775 .extended_weights(true)
29776 .mr(2)
29777 .nr(4)
29778 .kr(2)
29779 .sr(1)
29780 .m(2)
29781 .n(4)
29782 .k(k)
29783 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080029784 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c2__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070029785 }
29786 }
29787
29788 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C2__AVX, k_lt_8_subtile) {
29789 TEST_REQUIRES_X86_AVX;
29790 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029791 for (uint32_t n = 1; n <= 4; n++) {
29792 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan0ff79892021-08-06 16:05:06 -070029793 GemmMicrokernelTester()
29794 .extended_weights(true)
29795 .mr(2)
29796 .nr(4)
29797 .kr(2)
29798 .sr(1)
29799 .m(m)
29800 .n(n)
29801 .k(k)
29802 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029803 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c2__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070029804 }
29805 }
29806 }
29807 }
29808
29809 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C2__AVX, k_gt_8) {
29810 TEST_REQUIRES_X86_AVX;
29811 for (size_t k = 9; k < 16; k++) {
29812 GemmMicrokernelTester()
29813 .extended_weights(true)
29814 .mr(2)
29815 .nr(4)
29816 .kr(2)
29817 .sr(1)
29818 .m(2)
29819 .n(4)
29820 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080029821 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c2__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070029822 }
29823 }
29824
29825 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C2__AVX, k_gt_8_strided_a) {
29826 TEST_REQUIRES_X86_AVX;
29827 for (size_t k = 9; k < 16; k++) {
29828 GemmMicrokernelTester()
29829 .extended_weights(true)
29830 .mr(2)
29831 .nr(4)
29832 .kr(2)
29833 .sr(1)
29834 .m(2)
29835 .n(4)
29836 .k(k)
29837 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080029838 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c2__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070029839 }
29840 }
29841
29842 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C2__AVX, k_gt_8_subtile) {
29843 TEST_REQUIRES_X86_AVX;
29844 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029845 for (uint32_t n = 1; n <= 4; n++) {
29846 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan0ff79892021-08-06 16:05:06 -070029847 GemmMicrokernelTester()
29848 .extended_weights(true)
29849 .mr(2)
29850 .nr(4)
29851 .kr(2)
29852 .sr(1)
29853 .m(m)
29854 .n(n)
29855 .k(k)
29856 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029857 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c2__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070029858 }
29859 }
29860 }
29861 }
29862
29863 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C2__AVX, k_div_8) {
29864 TEST_REQUIRES_X86_AVX;
29865 for (size_t k = 16; k <= 80; k += 8) {
29866 GemmMicrokernelTester()
29867 .extended_weights(true)
29868 .mr(2)
29869 .nr(4)
29870 .kr(2)
29871 .sr(1)
29872 .m(2)
29873 .n(4)
29874 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080029875 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c2__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070029876 }
29877 }
29878
29879 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C2__AVX, k_div_8_strided_a) {
29880 TEST_REQUIRES_X86_AVX;
29881 for (size_t k = 16; k <= 80; k += 8) {
29882 GemmMicrokernelTester()
29883 .extended_weights(true)
29884 .mr(2)
29885 .nr(4)
29886 .kr(2)
29887 .sr(1)
29888 .m(2)
29889 .n(4)
29890 .k(k)
29891 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080029892 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c2__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070029893 }
29894 }
29895
29896 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C2__AVX, k_div_8_subtile) {
29897 TEST_REQUIRES_X86_AVX;
29898 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029899 for (uint32_t n = 1; n <= 4; n++) {
29900 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan0ff79892021-08-06 16:05:06 -070029901 GemmMicrokernelTester()
29902 .extended_weights(true)
29903 .mr(2)
29904 .nr(4)
29905 .kr(2)
29906 .sr(1)
29907 .m(m)
29908 .n(n)
29909 .k(k)
29910 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029911 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c2__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070029912 }
29913 }
29914 }
29915 }
29916
29917 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C2__AVX, n_gt_4) {
29918 TEST_REQUIRES_X86_AVX;
29919 for (uint32_t n = 5; n < 8; n++) {
29920 for (size_t k = 1; k <= 40; k += 9) {
29921 GemmMicrokernelTester()
29922 .extended_weights(true)
29923 .mr(2)
29924 .nr(4)
29925 .kr(2)
29926 .sr(1)
29927 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080029928 .n(n)
Marat Dukhan0ff79892021-08-06 16:05:06 -070029929 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080029930 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c2__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070029931 }
29932 }
29933 }
29934
29935 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C2__AVX, n_gt_4_strided_cn) {
29936 TEST_REQUIRES_X86_AVX;
29937 for (uint32_t n = 5; n < 8; n++) {
29938 for (size_t k = 1; k <= 40; k += 9) {
29939 GemmMicrokernelTester()
29940 .extended_weights(true)
29941 .mr(2)
29942 .nr(4)
29943 .kr(2)
29944 .sr(1)
29945 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080029946 .n(n)
Marat Dukhan0ff79892021-08-06 16:05:06 -070029947 .k(k)
29948 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080029949 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c2__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070029950 }
29951 }
29952 }
29953
29954 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C2__AVX, n_gt_4_strided_a) {
29955 TEST_REQUIRES_X86_AVX;
29956 for (uint32_t n = 5; n < 8; n++) {
29957 for (size_t k = 1; k <= 40; k += 9) {
29958 GemmMicrokernelTester()
29959 .extended_weights(true)
29960 .mr(2)
29961 .nr(4)
29962 .kr(2)
29963 .sr(1)
29964 .m(2)
29965 .n(n)
29966 .k(k)
29967 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080029968 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c2__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070029969 }
29970 }
29971 }
29972
29973 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C2__AVX, n_gt_4_subtile) {
29974 TEST_REQUIRES_X86_AVX;
29975 for (uint32_t n = 5; n < 8; n++) {
29976 for (size_t k = 1; k <= 40; k += 9) {
29977 for (uint32_t m = 1; m <= 2; m++) {
29978 GemmMicrokernelTester()
29979 .extended_weights(true)
29980 .mr(2)
29981 .nr(4)
29982 .kr(2)
29983 .sr(1)
29984 .m(m)
29985 .n(n)
29986 .k(k)
29987 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029988 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c2__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070029989 }
29990 }
29991 }
29992 }
29993
29994 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C2__AVX, n_div_4) {
29995 TEST_REQUIRES_X86_AVX;
29996 for (uint32_t n = 8; n <= 12; n += 4) {
29997 for (size_t k = 1; k <= 40; k += 9) {
29998 GemmMicrokernelTester()
29999 .extended_weights(true)
30000 .mr(2)
30001 .nr(4)
30002 .kr(2)
30003 .sr(1)
30004 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080030005 .n(n)
Marat Dukhan0ff79892021-08-06 16:05:06 -070030006 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080030007 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c2__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070030008 }
30009 }
30010 }
30011
30012 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C2__AVX, n_div_4_strided_cn) {
30013 TEST_REQUIRES_X86_AVX;
30014 for (uint32_t n = 8; n <= 12; n += 4) {
30015 for (size_t k = 1; k <= 40; k += 9) {
30016 GemmMicrokernelTester()
30017 .extended_weights(true)
30018 .mr(2)
30019 .nr(4)
30020 .kr(2)
30021 .sr(1)
30022 .m(2)
30023 .n(n)
30024 .k(k)
30025 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080030026 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c2__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070030027 }
30028 }
30029 }
30030
30031 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C2__AVX, n_div_4_strided_a) {
30032 TEST_REQUIRES_X86_AVX;
30033 for (uint32_t n = 8; n <= 12; n += 4) {
30034 for (size_t k = 1; k <= 40; k += 9) {
30035 GemmMicrokernelTester()
30036 .extended_weights(true)
30037 .mr(2)
30038 .nr(4)
30039 .kr(2)
30040 .sr(1)
30041 .m(2)
30042 .n(n)
30043 .k(k)
30044 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080030045 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c2__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070030046 }
30047 }
30048 }
30049
30050 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C2__AVX, n_div_4_subtile) {
30051 TEST_REQUIRES_X86_AVX;
30052 for (uint32_t n = 8; n <= 12; n += 4) {
30053 for (size_t k = 1; k <= 40; k += 9) {
30054 for (uint32_t m = 1; m <= 2; m++) {
30055 GemmMicrokernelTester()
30056 .extended_weights(true)
30057 .mr(2)
30058 .nr(4)
30059 .kr(2)
30060 .sr(1)
30061 .m(m)
30062 .n(n)
30063 .k(k)
30064 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030065 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c2__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070030066 }
30067 }
30068 }
30069 }
30070
30071 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C2__AVX, strided_cm_subtile) {
30072 TEST_REQUIRES_X86_AVX;
30073 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030074 for (uint32_t n = 1; n <= 4; n++) {
30075 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan0ff79892021-08-06 16:05:06 -070030076 GemmMicrokernelTester()
30077 .extended_weights(true)
30078 .mr(2)
30079 .nr(4)
30080 .kr(2)
30081 .sr(1)
30082 .m(m)
30083 .n(n)
30084 .k(k)
30085 .cm_stride(7)
30086 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030087 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c2__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070030088 }
30089 }
30090 }
30091 }
30092
30093 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C2__AVX, strided_cm) {
30094 TEST_REQUIRES_X86_AVX;
30095 GemmMicrokernelTester()
30096 .extended_weights(true)
30097 .mr(2)
30098 .nr(4)
30099 .kr(2)
30100 .sr(1)
30101 .m(2)
30102 .n(4)
30103 .k(8)
30104 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080030105 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c2__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070030106 }
30107#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
30108
30109
30110#if XNN_ARCH_X86 || XNN_ARCH_X86_64
30111 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C2__AVX, k_eq_8) {
30112 TEST_REQUIRES_X86_AVX;
30113 GemmMicrokernelTester()
30114 .extended_weights(true)
30115 .mr(3)
30116 .nr(4)
30117 .kr(2)
30118 .sr(1)
30119 .m(3)
30120 .n(4)
30121 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080030122 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070030123 }
30124
30125 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C2__AVX, strided_cn) {
30126 TEST_REQUIRES_X86_AVX;
30127 GemmMicrokernelTester()
30128 .extended_weights(true)
30129 .mr(3)
30130 .nr(4)
30131 .kr(2)
30132 .sr(1)
30133 .m(3)
30134 .n(4)
30135 .k(8)
30136 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080030137 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070030138 }
30139
30140 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C2__AVX, k_eq_8_strided_a) {
30141 TEST_REQUIRES_X86_AVX;
30142 GemmMicrokernelTester()
30143 .extended_weights(true)
30144 .mr(3)
30145 .nr(4)
30146 .kr(2)
30147 .sr(1)
30148 .m(3)
30149 .n(4)
30150 .k(8)
30151 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080030152 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070030153 }
30154
30155 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C2__AVX, k_eq_8_subtile) {
30156 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080030157 for (uint32_t n = 1; n <= 4; n++) {
30158 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan0ff79892021-08-06 16:05:06 -070030159 GemmMicrokernelTester()
30160 .extended_weights(true)
30161 .mr(3)
30162 .nr(4)
30163 .kr(2)
30164 .sr(1)
30165 .m(m)
30166 .n(n)
30167 .k(8)
30168 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030169 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070030170 }
30171 }
30172 }
30173
30174 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C2__AVX, k_eq_8_subtile_m) {
30175 TEST_REQUIRES_X86_AVX;
30176 for (uint32_t m = 1; m <= 3; m++) {
30177 GemmMicrokernelTester()
30178 .extended_weights(true)
30179 .mr(3)
30180 .nr(4)
30181 .kr(2)
30182 .sr(1)
30183 .m(m)
30184 .n(4)
30185 .k(8)
30186 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030187 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070030188 }
30189 }
30190
30191 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C2__AVX, k_eq_8_subtile_n) {
30192 TEST_REQUIRES_X86_AVX;
30193 for (uint32_t n = 1; n <= 4; n++) {
30194 GemmMicrokernelTester()
30195 .extended_weights(true)
30196 .mr(3)
30197 .nr(4)
30198 .kr(2)
30199 .sr(1)
30200 .m(3)
30201 .n(n)
30202 .k(8)
30203 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030204 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070030205 }
30206 }
30207
30208 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C2__AVX, k_lt_8) {
30209 TEST_REQUIRES_X86_AVX;
30210 for (size_t k = 1; k < 8; k++) {
30211 GemmMicrokernelTester()
30212 .extended_weights(true)
30213 .mr(3)
30214 .nr(4)
30215 .kr(2)
30216 .sr(1)
30217 .m(3)
30218 .n(4)
30219 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080030220 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070030221 }
30222 }
30223
30224 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C2__AVX, k_lt_8_strided_a) {
30225 TEST_REQUIRES_X86_AVX;
30226 for (size_t k = 1; k < 8; k++) {
30227 GemmMicrokernelTester()
30228 .extended_weights(true)
30229 .mr(3)
30230 .nr(4)
30231 .kr(2)
30232 .sr(1)
30233 .m(3)
30234 .n(4)
30235 .k(k)
30236 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080030237 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070030238 }
30239 }
30240
30241 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C2__AVX, k_lt_8_subtile) {
30242 TEST_REQUIRES_X86_AVX;
30243 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030244 for (uint32_t n = 1; n <= 4; n++) {
30245 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan0ff79892021-08-06 16:05:06 -070030246 GemmMicrokernelTester()
30247 .extended_weights(true)
30248 .mr(3)
30249 .nr(4)
30250 .kr(2)
30251 .sr(1)
30252 .m(m)
30253 .n(n)
30254 .k(k)
30255 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030256 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070030257 }
30258 }
30259 }
30260 }
30261
30262 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C2__AVX, k_gt_8) {
30263 TEST_REQUIRES_X86_AVX;
30264 for (size_t k = 9; k < 16; k++) {
30265 GemmMicrokernelTester()
30266 .extended_weights(true)
30267 .mr(3)
30268 .nr(4)
30269 .kr(2)
30270 .sr(1)
30271 .m(3)
30272 .n(4)
30273 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080030274 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070030275 }
30276 }
30277
30278 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C2__AVX, k_gt_8_strided_a) {
30279 TEST_REQUIRES_X86_AVX;
30280 for (size_t k = 9; k < 16; k++) {
30281 GemmMicrokernelTester()
30282 .extended_weights(true)
30283 .mr(3)
30284 .nr(4)
30285 .kr(2)
30286 .sr(1)
30287 .m(3)
30288 .n(4)
30289 .k(k)
30290 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080030291 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070030292 }
30293 }
30294
30295 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C2__AVX, k_gt_8_subtile) {
30296 TEST_REQUIRES_X86_AVX;
30297 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030298 for (uint32_t n = 1; n <= 4; n++) {
30299 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan0ff79892021-08-06 16:05:06 -070030300 GemmMicrokernelTester()
30301 .extended_weights(true)
30302 .mr(3)
30303 .nr(4)
30304 .kr(2)
30305 .sr(1)
30306 .m(m)
30307 .n(n)
30308 .k(k)
30309 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030310 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070030311 }
30312 }
30313 }
30314 }
30315
30316 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C2__AVX, k_div_8) {
30317 TEST_REQUIRES_X86_AVX;
30318 for (size_t k = 16; k <= 80; k += 8) {
30319 GemmMicrokernelTester()
30320 .extended_weights(true)
30321 .mr(3)
30322 .nr(4)
30323 .kr(2)
30324 .sr(1)
30325 .m(3)
30326 .n(4)
30327 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080030328 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070030329 }
30330 }
30331
30332 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C2__AVX, k_div_8_strided_a) {
30333 TEST_REQUIRES_X86_AVX;
30334 for (size_t k = 16; k <= 80; k += 8) {
30335 GemmMicrokernelTester()
30336 .extended_weights(true)
30337 .mr(3)
30338 .nr(4)
30339 .kr(2)
30340 .sr(1)
30341 .m(3)
30342 .n(4)
30343 .k(k)
30344 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080030345 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070030346 }
30347 }
30348
30349 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C2__AVX, k_div_8_subtile) {
30350 TEST_REQUIRES_X86_AVX;
30351 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030352 for (uint32_t n = 1; n <= 4; n++) {
30353 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan0ff79892021-08-06 16:05:06 -070030354 GemmMicrokernelTester()
30355 .extended_weights(true)
30356 .mr(3)
30357 .nr(4)
30358 .kr(2)
30359 .sr(1)
30360 .m(m)
30361 .n(n)
30362 .k(k)
30363 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030364 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070030365 }
30366 }
30367 }
30368 }
30369
30370 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C2__AVX, n_gt_4) {
30371 TEST_REQUIRES_X86_AVX;
30372 for (uint32_t n = 5; n < 8; n++) {
30373 for (size_t k = 1; k <= 40; k += 9) {
30374 GemmMicrokernelTester()
30375 .extended_weights(true)
30376 .mr(3)
30377 .nr(4)
30378 .kr(2)
30379 .sr(1)
30380 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080030381 .n(n)
Marat Dukhan0ff79892021-08-06 16:05:06 -070030382 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080030383 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070030384 }
30385 }
30386 }
30387
30388 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C2__AVX, n_gt_4_strided_cn) {
30389 TEST_REQUIRES_X86_AVX;
30390 for (uint32_t n = 5; n < 8; n++) {
30391 for (size_t k = 1; k <= 40; k += 9) {
30392 GemmMicrokernelTester()
30393 .extended_weights(true)
30394 .mr(3)
30395 .nr(4)
30396 .kr(2)
30397 .sr(1)
30398 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080030399 .n(n)
Marat Dukhan0ff79892021-08-06 16:05:06 -070030400 .k(k)
30401 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080030402 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070030403 }
30404 }
30405 }
30406
30407 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C2__AVX, n_gt_4_strided_a) {
30408 TEST_REQUIRES_X86_AVX;
30409 for (uint32_t n = 5; n < 8; n++) {
30410 for (size_t k = 1; k <= 40; k += 9) {
30411 GemmMicrokernelTester()
30412 .extended_weights(true)
30413 .mr(3)
30414 .nr(4)
30415 .kr(2)
30416 .sr(1)
30417 .m(3)
30418 .n(n)
30419 .k(k)
30420 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080030421 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070030422 }
30423 }
30424 }
30425
30426 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C2__AVX, n_gt_4_subtile) {
30427 TEST_REQUIRES_X86_AVX;
30428 for (uint32_t n = 5; n < 8; n++) {
30429 for (size_t k = 1; k <= 40; k += 9) {
30430 for (uint32_t m = 1; m <= 3; m++) {
30431 GemmMicrokernelTester()
30432 .extended_weights(true)
30433 .mr(3)
30434 .nr(4)
30435 .kr(2)
30436 .sr(1)
30437 .m(m)
30438 .n(n)
30439 .k(k)
30440 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030441 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070030442 }
30443 }
30444 }
30445 }
30446
30447 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C2__AVX, n_div_4) {
30448 TEST_REQUIRES_X86_AVX;
30449 for (uint32_t n = 8; n <= 12; n += 4) {
30450 for (size_t k = 1; k <= 40; k += 9) {
30451 GemmMicrokernelTester()
30452 .extended_weights(true)
30453 .mr(3)
30454 .nr(4)
30455 .kr(2)
30456 .sr(1)
30457 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080030458 .n(n)
Marat Dukhan0ff79892021-08-06 16:05:06 -070030459 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080030460 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070030461 }
30462 }
30463 }
30464
30465 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C2__AVX, n_div_4_strided_cn) {
30466 TEST_REQUIRES_X86_AVX;
30467 for (uint32_t n = 8; n <= 12; n += 4) {
30468 for (size_t k = 1; k <= 40; k += 9) {
30469 GemmMicrokernelTester()
30470 .extended_weights(true)
30471 .mr(3)
30472 .nr(4)
30473 .kr(2)
30474 .sr(1)
30475 .m(3)
30476 .n(n)
30477 .k(k)
30478 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080030479 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070030480 }
30481 }
30482 }
30483
30484 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C2__AVX, n_div_4_strided_a) {
30485 TEST_REQUIRES_X86_AVX;
30486 for (uint32_t n = 8; n <= 12; n += 4) {
30487 for (size_t k = 1; k <= 40; k += 9) {
30488 GemmMicrokernelTester()
30489 .extended_weights(true)
30490 .mr(3)
30491 .nr(4)
30492 .kr(2)
30493 .sr(1)
30494 .m(3)
30495 .n(n)
30496 .k(k)
30497 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080030498 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070030499 }
30500 }
30501 }
30502
30503 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C2__AVX, n_div_4_subtile) {
30504 TEST_REQUIRES_X86_AVX;
30505 for (uint32_t n = 8; n <= 12; n += 4) {
30506 for (size_t k = 1; k <= 40; k += 9) {
30507 for (uint32_t m = 1; m <= 3; m++) {
30508 GemmMicrokernelTester()
30509 .extended_weights(true)
30510 .mr(3)
30511 .nr(4)
30512 .kr(2)
30513 .sr(1)
30514 .m(m)
30515 .n(n)
30516 .k(k)
30517 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030518 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070030519 }
30520 }
30521 }
30522 }
30523
30524 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C2__AVX, strided_cm_subtile) {
30525 TEST_REQUIRES_X86_AVX;
30526 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030527 for (uint32_t n = 1; n <= 4; n++) {
30528 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan0ff79892021-08-06 16:05:06 -070030529 GemmMicrokernelTester()
30530 .extended_weights(true)
30531 .mr(3)
30532 .nr(4)
30533 .kr(2)
30534 .sr(1)
30535 .m(m)
30536 .n(n)
30537 .k(k)
30538 .cm_stride(7)
30539 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030540 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070030541 }
30542 }
30543 }
30544 }
30545
30546 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C2__AVX, strided_cm) {
30547 TEST_REQUIRES_X86_AVX;
30548 GemmMicrokernelTester()
30549 .extended_weights(true)
30550 .mr(3)
30551 .nr(4)
30552 .kr(2)
30553 .sr(1)
30554 .m(3)
30555 .n(4)
30556 .k(8)
30557 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080030558 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070030559 }
30560#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
30561
30562
30563#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan0ff79892021-08-06 16:05:06 -070030564 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C2__XOP, k_eq_8) {
30565 TEST_REQUIRES_X86_XOP;
30566 GemmMicrokernelTester()
30567 .extended_weights(true)
30568 .mr(1)
30569 .nr(4)
30570 .kr(2)
30571 .sr(1)
30572 .m(1)
30573 .n(4)
30574 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080030575 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c2__xop, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070030576 }
30577
30578 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C2__XOP, strided_cn) {
30579 TEST_REQUIRES_X86_XOP;
30580 GemmMicrokernelTester()
30581 .extended_weights(true)
30582 .mr(1)
30583 .nr(4)
30584 .kr(2)
30585 .sr(1)
30586 .m(1)
30587 .n(4)
30588 .k(8)
30589 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080030590 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c2__xop, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070030591 }
30592
30593 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C2__XOP, k_eq_8_strided_a) {
30594 TEST_REQUIRES_X86_XOP;
30595 GemmMicrokernelTester()
30596 .extended_weights(true)
30597 .mr(1)
30598 .nr(4)
30599 .kr(2)
30600 .sr(1)
30601 .m(1)
30602 .n(4)
30603 .k(8)
30604 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080030605 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c2__xop, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070030606 }
30607
30608 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C2__XOP, k_eq_8_subtile) {
30609 TEST_REQUIRES_X86_XOP;
Zhi An Ng83844ae2022-01-14 09:52:25 -080030610 for (uint32_t n = 1; n <= 4; n++) {
30611 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan0ff79892021-08-06 16:05:06 -070030612 GemmMicrokernelTester()
30613 .extended_weights(true)
30614 .mr(1)
30615 .nr(4)
30616 .kr(2)
30617 .sr(1)
30618 .m(m)
30619 .n(n)
30620 .k(8)
30621 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030622 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c2__xop, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070030623 }
30624 }
30625 }
30626
30627 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C2__XOP, k_eq_8_subtile_m) {
30628 TEST_REQUIRES_X86_XOP;
30629 for (uint32_t m = 1; m <= 1; m++) {
30630 GemmMicrokernelTester()
30631 .extended_weights(true)
30632 .mr(1)
30633 .nr(4)
30634 .kr(2)
30635 .sr(1)
30636 .m(m)
30637 .n(4)
30638 .k(8)
30639 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030640 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c2__xop, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070030641 }
30642 }
30643
30644 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C2__XOP, k_eq_8_subtile_n) {
30645 TEST_REQUIRES_X86_XOP;
30646 for (uint32_t n = 1; n <= 4; n++) {
30647 GemmMicrokernelTester()
30648 .extended_weights(true)
30649 .mr(1)
30650 .nr(4)
30651 .kr(2)
30652 .sr(1)
30653 .m(1)
30654 .n(n)
30655 .k(8)
30656 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030657 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c2__xop, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070030658 }
30659 }
30660
30661 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C2__XOP, k_lt_8) {
30662 TEST_REQUIRES_X86_XOP;
30663 for (size_t k = 1; k < 8; k++) {
30664 GemmMicrokernelTester()
30665 .extended_weights(true)
30666 .mr(1)
30667 .nr(4)
30668 .kr(2)
30669 .sr(1)
30670 .m(1)
30671 .n(4)
30672 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080030673 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c2__xop, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070030674 }
30675 }
30676
30677 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C2__XOP, k_lt_8_strided_a) {
30678 TEST_REQUIRES_X86_XOP;
30679 for (size_t k = 1; k < 8; k++) {
30680 GemmMicrokernelTester()
30681 .extended_weights(true)
30682 .mr(1)
30683 .nr(4)
30684 .kr(2)
30685 .sr(1)
30686 .m(1)
30687 .n(4)
30688 .k(k)
30689 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080030690 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c2__xop, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070030691 }
30692 }
30693
30694 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C2__XOP, k_lt_8_subtile) {
30695 TEST_REQUIRES_X86_XOP;
30696 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030697 for (uint32_t n = 1; n <= 4; n++) {
30698 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan0ff79892021-08-06 16:05:06 -070030699 GemmMicrokernelTester()
30700 .extended_weights(true)
30701 .mr(1)
30702 .nr(4)
30703 .kr(2)
30704 .sr(1)
30705 .m(m)
30706 .n(n)
30707 .k(k)
30708 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030709 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c2__xop, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070030710 }
30711 }
30712 }
30713 }
30714
30715 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C2__XOP, k_gt_8) {
30716 TEST_REQUIRES_X86_XOP;
30717 for (size_t k = 9; k < 16; k++) {
30718 GemmMicrokernelTester()
30719 .extended_weights(true)
30720 .mr(1)
30721 .nr(4)
30722 .kr(2)
30723 .sr(1)
30724 .m(1)
30725 .n(4)
30726 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080030727 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c2__xop, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070030728 }
30729 }
30730
30731 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C2__XOP, k_gt_8_strided_a) {
30732 TEST_REQUIRES_X86_XOP;
30733 for (size_t k = 9; k < 16; k++) {
30734 GemmMicrokernelTester()
30735 .extended_weights(true)
30736 .mr(1)
30737 .nr(4)
30738 .kr(2)
30739 .sr(1)
30740 .m(1)
30741 .n(4)
30742 .k(k)
30743 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080030744 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c2__xop, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070030745 }
30746 }
30747
30748 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C2__XOP, k_gt_8_subtile) {
30749 TEST_REQUIRES_X86_XOP;
30750 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030751 for (uint32_t n = 1; n <= 4; n++) {
30752 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan0ff79892021-08-06 16:05:06 -070030753 GemmMicrokernelTester()
30754 .extended_weights(true)
30755 .mr(1)
30756 .nr(4)
30757 .kr(2)
30758 .sr(1)
30759 .m(m)
30760 .n(n)
30761 .k(k)
30762 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030763 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c2__xop, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070030764 }
30765 }
30766 }
30767 }
30768
30769 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C2__XOP, k_div_8) {
30770 TEST_REQUIRES_X86_XOP;
30771 for (size_t k = 16; k <= 80; k += 8) {
30772 GemmMicrokernelTester()
30773 .extended_weights(true)
30774 .mr(1)
30775 .nr(4)
30776 .kr(2)
30777 .sr(1)
30778 .m(1)
30779 .n(4)
30780 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080030781 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c2__xop, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070030782 }
30783 }
30784
30785 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C2__XOP, k_div_8_strided_a) {
30786 TEST_REQUIRES_X86_XOP;
30787 for (size_t k = 16; k <= 80; k += 8) {
30788 GemmMicrokernelTester()
30789 .extended_weights(true)
30790 .mr(1)
30791 .nr(4)
30792 .kr(2)
30793 .sr(1)
30794 .m(1)
30795 .n(4)
30796 .k(k)
30797 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080030798 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c2__xop, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070030799 }
30800 }
30801
30802 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C2__XOP, k_div_8_subtile) {
30803 TEST_REQUIRES_X86_XOP;
30804 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030805 for (uint32_t n = 1; n <= 4; n++) {
30806 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan0ff79892021-08-06 16:05:06 -070030807 GemmMicrokernelTester()
30808 .extended_weights(true)
30809 .mr(1)
30810 .nr(4)
30811 .kr(2)
30812 .sr(1)
30813 .m(m)
30814 .n(n)
30815 .k(k)
30816 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030817 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c2__xop, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070030818 }
30819 }
30820 }
30821 }
30822
30823 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C2__XOP, n_gt_4) {
30824 TEST_REQUIRES_X86_XOP;
30825 for (uint32_t n = 5; n < 8; n++) {
30826 for (size_t k = 1; k <= 40; k += 9) {
30827 GemmMicrokernelTester()
30828 .extended_weights(true)
30829 .mr(1)
30830 .nr(4)
30831 .kr(2)
30832 .sr(1)
30833 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080030834 .n(n)
Marat Dukhan0ff79892021-08-06 16:05:06 -070030835 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080030836 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c2__xop, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070030837 }
30838 }
30839 }
30840
30841 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C2__XOP, n_gt_4_strided_cn) {
30842 TEST_REQUIRES_X86_XOP;
30843 for (uint32_t n = 5; n < 8; n++) {
30844 for (size_t k = 1; k <= 40; k += 9) {
30845 GemmMicrokernelTester()
30846 .extended_weights(true)
30847 .mr(1)
30848 .nr(4)
30849 .kr(2)
30850 .sr(1)
30851 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080030852 .n(n)
Marat Dukhan0ff79892021-08-06 16:05:06 -070030853 .k(k)
30854 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080030855 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c2__xop, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070030856 }
30857 }
30858 }
30859
30860 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C2__XOP, n_gt_4_strided_a) {
30861 TEST_REQUIRES_X86_XOP;
30862 for (uint32_t n = 5; n < 8; n++) {
30863 for (size_t k = 1; k <= 40; k += 9) {
30864 GemmMicrokernelTester()
30865 .extended_weights(true)
30866 .mr(1)
30867 .nr(4)
30868 .kr(2)
30869 .sr(1)
30870 .m(1)
30871 .n(n)
30872 .k(k)
30873 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080030874 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c2__xop, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070030875 }
30876 }
30877 }
30878
30879 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C2__XOP, n_gt_4_subtile) {
30880 TEST_REQUIRES_X86_XOP;
30881 for (uint32_t n = 5; n < 8; n++) {
30882 for (size_t k = 1; k <= 40; k += 9) {
30883 for (uint32_t m = 1; m <= 1; m++) {
30884 GemmMicrokernelTester()
30885 .extended_weights(true)
30886 .mr(1)
30887 .nr(4)
30888 .kr(2)
30889 .sr(1)
30890 .m(m)
30891 .n(n)
30892 .k(k)
30893 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030894 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c2__xop, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070030895 }
30896 }
30897 }
30898 }
30899
30900 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C2__XOP, n_div_4) {
30901 TEST_REQUIRES_X86_XOP;
30902 for (uint32_t n = 8; n <= 12; n += 4) {
30903 for (size_t k = 1; k <= 40; k += 9) {
30904 GemmMicrokernelTester()
30905 .extended_weights(true)
30906 .mr(1)
30907 .nr(4)
30908 .kr(2)
30909 .sr(1)
30910 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080030911 .n(n)
Marat Dukhan0ff79892021-08-06 16:05:06 -070030912 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080030913 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c2__xop, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070030914 }
30915 }
30916 }
30917
30918 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C2__XOP, n_div_4_strided_cn) {
30919 TEST_REQUIRES_X86_XOP;
30920 for (uint32_t n = 8; n <= 12; n += 4) {
30921 for (size_t k = 1; k <= 40; k += 9) {
30922 GemmMicrokernelTester()
30923 .extended_weights(true)
30924 .mr(1)
30925 .nr(4)
30926 .kr(2)
30927 .sr(1)
30928 .m(1)
30929 .n(n)
30930 .k(k)
30931 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080030932 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c2__xop, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070030933 }
30934 }
30935 }
30936
30937 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C2__XOP, n_div_4_strided_a) {
30938 TEST_REQUIRES_X86_XOP;
30939 for (uint32_t n = 8; n <= 12; n += 4) {
30940 for (size_t k = 1; k <= 40; k += 9) {
30941 GemmMicrokernelTester()
30942 .extended_weights(true)
30943 .mr(1)
30944 .nr(4)
30945 .kr(2)
30946 .sr(1)
30947 .m(1)
30948 .n(n)
30949 .k(k)
30950 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080030951 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c2__xop, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070030952 }
30953 }
30954 }
30955
30956 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C2__XOP, n_div_4_subtile) {
30957 TEST_REQUIRES_X86_XOP;
30958 for (uint32_t n = 8; n <= 12; n += 4) {
30959 for (size_t k = 1; k <= 40; k += 9) {
30960 for (uint32_t m = 1; m <= 1; m++) {
30961 GemmMicrokernelTester()
30962 .extended_weights(true)
30963 .mr(1)
30964 .nr(4)
30965 .kr(2)
30966 .sr(1)
30967 .m(m)
30968 .n(n)
30969 .k(k)
30970 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030971 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c2__xop, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070030972 }
30973 }
30974 }
30975 }
30976
30977 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C2__XOP, strided_cm_subtile) {
30978 TEST_REQUIRES_X86_XOP;
30979 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030980 for (uint32_t n = 1; n <= 4; n++) {
30981 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan0ff79892021-08-06 16:05:06 -070030982 GemmMicrokernelTester()
30983 .extended_weights(true)
30984 .mr(1)
30985 .nr(4)
30986 .kr(2)
30987 .sr(1)
30988 .m(m)
30989 .n(n)
30990 .k(k)
30991 .cm_stride(7)
30992 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030993 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c2__xop, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070030994 }
30995 }
30996 }
30997 }
30998
30999 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C2__XOP, strided_cm) {
31000 TEST_REQUIRES_X86_XOP;
31001 GemmMicrokernelTester()
31002 .extended_weights(true)
31003 .mr(1)
31004 .nr(4)
31005 .kr(2)
31006 .sr(1)
31007 .m(1)
31008 .n(4)
31009 .k(8)
31010 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080031011 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c2__xop, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070031012 }
31013#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
31014
31015
31016#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan0ff79892021-08-06 16:05:06 -070031017 TEST(QS8_GEMM_XW_MINMAX_FP32_4X4C2__XOP, k_eq_8) {
31018 TEST_REQUIRES_X86_XOP;
31019 GemmMicrokernelTester()
31020 .extended_weights(true)
31021 .mr(4)
31022 .nr(4)
31023 .kr(2)
31024 .sr(1)
31025 .m(4)
31026 .n(4)
31027 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080031028 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_4x4c2__xop, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070031029 }
31030
31031 TEST(QS8_GEMM_XW_MINMAX_FP32_4X4C2__XOP, strided_cn) {
31032 TEST_REQUIRES_X86_XOP;
31033 GemmMicrokernelTester()
31034 .extended_weights(true)
31035 .mr(4)
31036 .nr(4)
31037 .kr(2)
31038 .sr(1)
31039 .m(4)
31040 .n(4)
31041 .k(8)
31042 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080031043 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_4x4c2__xop, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070031044 }
31045
31046 TEST(QS8_GEMM_XW_MINMAX_FP32_4X4C2__XOP, k_eq_8_strided_a) {
31047 TEST_REQUIRES_X86_XOP;
31048 GemmMicrokernelTester()
31049 .extended_weights(true)
31050 .mr(4)
31051 .nr(4)
31052 .kr(2)
31053 .sr(1)
31054 .m(4)
31055 .n(4)
31056 .k(8)
31057 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080031058 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_4x4c2__xop, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070031059 }
31060
31061 TEST(QS8_GEMM_XW_MINMAX_FP32_4X4C2__XOP, k_eq_8_subtile) {
31062 TEST_REQUIRES_X86_XOP;
Zhi An Ng83844ae2022-01-14 09:52:25 -080031063 for (uint32_t n = 1; n <= 4; n++) {
31064 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan0ff79892021-08-06 16:05:06 -070031065 GemmMicrokernelTester()
31066 .extended_weights(true)
31067 .mr(4)
31068 .nr(4)
31069 .kr(2)
31070 .sr(1)
31071 .m(m)
31072 .n(n)
31073 .k(8)
31074 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080031075 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_4x4c2__xop, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070031076 }
31077 }
31078 }
31079
31080 TEST(QS8_GEMM_XW_MINMAX_FP32_4X4C2__XOP, k_eq_8_subtile_m) {
31081 TEST_REQUIRES_X86_XOP;
31082 for (uint32_t m = 1; m <= 4; m++) {
31083 GemmMicrokernelTester()
31084 .extended_weights(true)
31085 .mr(4)
31086 .nr(4)
31087 .kr(2)
31088 .sr(1)
31089 .m(m)
31090 .n(4)
31091 .k(8)
31092 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080031093 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_4x4c2__xop, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070031094 }
31095 }
31096
31097 TEST(QS8_GEMM_XW_MINMAX_FP32_4X4C2__XOP, k_eq_8_subtile_n) {
31098 TEST_REQUIRES_X86_XOP;
31099 for (uint32_t n = 1; n <= 4; n++) {
31100 GemmMicrokernelTester()
31101 .extended_weights(true)
31102 .mr(4)
31103 .nr(4)
31104 .kr(2)
31105 .sr(1)
31106 .m(4)
31107 .n(n)
31108 .k(8)
31109 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080031110 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_4x4c2__xop, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070031111 }
31112 }
31113
31114 TEST(QS8_GEMM_XW_MINMAX_FP32_4X4C2__XOP, k_lt_8) {
31115 TEST_REQUIRES_X86_XOP;
31116 for (size_t k = 1; k < 8; k++) {
31117 GemmMicrokernelTester()
31118 .extended_weights(true)
31119 .mr(4)
31120 .nr(4)
31121 .kr(2)
31122 .sr(1)
31123 .m(4)
31124 .n(4)
31125 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080031126 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_4x4c2__xop, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070031127 }
31128 }
31129
31130 TEST(QS8_GEMM_XW_MINMAX_FP32_4X4C2__XOP, k_lt_8_strided_a) {
31131 TEST_REQUIRES_X86_XOP;
31132 for (size_t k = 1; k < 8; k++) {
31133 GemmMicrokernelTester()
31134 .extended_weights(true)
31135 .mr(4)
31136 .nr(4)
31137 .kr(2)
31138 .sr(1)
31139 .m(4)
31140 .n(4)
31141 .k(k)
31142 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080031143 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_4x4c2__xop, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070031144 }
31145 }
31146
31147 TEST(QS8_GEMM_XW_MINMAX_FP32_4X4C2__XOP, k_lt_8_subtile) {
31148 TEST_REQUIRES_X86_XOP;
31149 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031150 for (uint32_t n = 1; n <= 4; n++) {
31151 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan0ff79892021-08-06 16:05:06 -070031152 GemmMicrokernelTester()
31153 .extended_weights(true)
31154 .mr(4)
31155 .nr(4)
31156 .kr(2)
31157 .sr(1)
31158 .m(m)
31159 .n(n)
31160 .k(k)
31161 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080031162 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_4x4c2__xop, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070031163 }
31164 }
31165 }
31166 }
31167
31168 TEST(QS8_GEMM_XW_MINMAX_FP32_4X4C2__XOP, k_gt_8) {
31169 TEST_REQUIRES_X86_XOP;
31170 for (size_t k = 9; k < 16; k++) {
31171 GemmMicrokernelTester()
31172 .extended_weights(true)
31173 .mr(4)
31174 .nr(4)
31175 .kr(2)
31176 .sr(1)
31177 .m(4)
31178 .n(4)
31179 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080031180 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_4x4c2__xop, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070031181 }
31182 }
31183
31184 TEST(QS8_GEMM_XW_MINMAX_FP32_4X4C2__XOP, k_gt_8_strided_a) {
31185 TEST_REQUIRES_X86_XOP;
31186 for (size_t k = 9; k < 16; k++) {
31187 GemmMicrokernelTester()
31188 .extended_weights(true)
31189 .mr(4)
31190 .nr(4)
31191 .kr(2)
31192 .sr(1)
31193 .m(4)
31194 .n(4)
31195 .k(k)
31196 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080031197 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_4x4c2__xop, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070031198 }
31199 }
31200
31201 TEST(QS8_GEMM_XW_MINMAX_FP32_4X4C2__XOP, k_gt_8_subtile) {
31202 TEST_REQUIRES_X86_XOP;
31203 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031204 for (uint32_t n = 1; n <= 4; n++) {
31205 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan0ff79892021-08-06 16:05:06 -070031206 GemmMicrokernelTester()
31207 .extended_weights(true)
31208 .mr(4)
31209 .nr(4)
31210 .kr(2)
31211 .sr(1)
31212 .m(m)
31213 .n(n)
31214 .k(k)
31215 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080031216 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_4x4c2__xop, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070031217 }
31218 }
31219 }
31220 }
31221
31222 TEST(QS8_GEMM_XW_MINMAX_FP32_4X4C2__XOP, k_div_8) {
31223 TEST_REQUIRES_X86_XOP;
31224 for (size_t k = 16; k <= 80; k += 8) {
31225 GemmMicrokernelTester()
31226 .extended_weights(true)
31227 .mr(4)
31228 .nr(4)
31229 .kr(2)
31230 .sr(1)
31231 .m(4)
31232 .n(4)
31233 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080031234 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_4x4c2__xop, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070031235 }
31236 }
31237
31238 TEST(QS8_GEMM_XW_MINMAX_FP32_4X4C2__XOP, k_div_8_strided_a) {
31239 TEST_REQUIRES_X86_XOP;
31240 for (size_t k = 16; k <= 80; k += 8) {
31241 GemmMicrokernelTester()
31242 .extended_weights(true)
31243 .mr(4)
31244 .nr(4)
31245 .kr(2)
31246 .sr(1)
31247 .m(4)
31248 .n(4)
31249 .k(k)
31250 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080031251 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_4x4c2__xop, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070031252 }
31253 }
31254
31255 TEST(QS8_GEMM_XW_MINMAX_FP32_4X4C2__XOP, k_div_8_subtile) {
31256 TEST_REQUIRES_X86_XOP;
31257 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031258 for (uint32_t n = 1; n <= 4; n++) {
31259 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan0ff79892021-08-06 16:05:06 -070031260 GemmMicrokernelTester()
31261 .extended_weights(true)
31262 .mr(4)
31263 .nr(4)
31264 .kr(2)
31265 .sr(1)
31266 .m(m)
31267 .n(n)
31268 .k(k)
31269 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080031270 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_4x4c2__xop, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070031271 }
31272 }
31273 }
31274 }
31275
31276 TEST(QS8_GEMM_XW_MINMAX_FP32_4X4C2__XOP, n_gt_4) {
31277 TEST_REQUIRES_X86_XOP;
31278 for (uint32_t n = 5; n < 8; n++) {
31279 for (size_t k = 1; k <= 40; k += 9) {
31280 GemmMicrokernelTester()
31281 .extended_weights(true)
31282 .mr(4)
31283 .nr(4)
31284 .kr(2)
31285 .sr(1)
31286 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080031287 .n(n)
Marat Dukhan0ff79892021-08-06 16:05:06 -070031288 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080031289 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_4x4c2__xop, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070031290 }
31291 }
31292 }
31293
31294 TEST(QS8_GEMM_XW_MINMAX_FP32_4X4C2__XOP, n_gt_4_strided_cn) {
31295 TEST_REQUIRES_X86_XOP;
31296 for (uint32_t n = 5; n < 8; n++) {
31297 for (size_t k = 1; k <= 40; k += 9) {
31298 GemmMicrokernelTester()
31299 .extended_weights(true)
31300 .mr(4)
31301 .nr(4)
31302 .kr(2)
31303 .sr(1)
31304 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080031305 .n(n)
Marat Dukhan0ff79892021-08-06 16:05:06 -070031306 .k(k)
31307 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080031308 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_4x4c2__xop, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070031309 }
31310 }
31311 }
31312
31313 TEST(QS8_GEMM_XW_MINMAX_FP32_4X4C2__XOP, n_gt_4_strided_a) {
31314 TEST_REQUIRES_X86_XOP;
31315 for (uint32_t n = 5; n < 8; n++) {
31316 for (size_t k = 1; k <= 40; k += 9) {
31317 GemmMicrokernelTester()
31318 .extended_weights(true)
31319 .mr(4)
31320 .nr(4)
31321 .kr(2)
31322 .sr(1)
31323 .m(4)
31324 .n(n)
31325 .k(k)
31326 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080031327 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_4x4c2__xop, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070031328 }
31329 }
31330 }
31331
31332 TEST(QS8_GEMM_XW_MINMAX_FP32_4X4C2__XOP, n_gt_4_subtile) {
31333 TEST_REQUIRES_X86_XOP;
31334 for (uint32_t n = 5; n < 8; n++) {
31335 for (size_t k = 1; k <= 40; k += 9) {
31336 for (uint32_t m = 1; m <= 4; m++) {
31337 GemmMicrokernelTester()
31338 .extended_weights(true)
31339 .mr(4)
31340 .nr(4)
31341 .kr(2)
31342 .sr(1)
31343 .m(m)
31344 .n(n)
31345 .k(k)
31346 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080031347 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_4x4c2__xop, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070031348 }
31349 }
31350 }
31351 }
31352
31353 TEST(QS8_GEMM_XW_MINMAX_FP32_4X4C2__XOP, n_div_4) {
31354 TEST_REQUIRES_X86_XOP;
31355 for (uint32_t n = 8; n <= 12; n += 4) {
31356 for (size_t k = 1; k <= 40; k += 9) {
31357 GemmMicrokernelTester()
31358 .extended_weights(true)
31359 .mr(4)
31360 .nr(4)
31361 .kr(2)
31362 .sr(1)
31363 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080031364 .n(n)
Marat Dukhan0ff79892021-08-06 16:05:06 -070031365 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080031366 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_4x4c2__xop, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070031367 }
31368 }
31369 }
31370
31371 TEST(QS8_GEMM_XW_MINMAX_FP32_4X4C2__XOP, n_div_4_strided_cn) {
31372 TEST_REQUIRES_X86_XOP;
31373 for (uint32_t n = 8; n <= 12; n += 4) {
31374 for (size_t k = 1; k <= 40; k += 9) {
31375 GemmMicrokernelTester()
31376 .extended_weights(true)
31377 .mr(4)
31378 .nr(4)
31379 .kr(2)
31380 .sr(1)
31381 .m(4)
31382 .n(n)
31383 .k(k)
31384 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080031385 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_4x4c2__xop, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070031386 }
31387 }
31388 }
31389
31390 TEST(QS8_GEMM_XW_MINMAX_FP32_4X4C2__XOP, n_div_4_strided_a) {
31391 TEST_REQUIRES_X86_XOP;
31392 for (uint32_t n = 8; n <= 12; n += 4) {
31393 for (size_t k = 1; k <= 40; k += 9) {
31394 GemmMicrokernelTester()
31395 .extended_weights(true)
31396 .mr(4)
31397 .nr(4)
31398 .kr(2)
31399 .sr(1)
31400 .m(4)
31401 .n(n)
31402 .k(k)
31403 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080031404 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_4x4c2__xop, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070031405 }
31406 }
31407 }
31408
31409 TEST(QS8_GEMM_XW_MINMAX_FP32_4X4C2__XOP, n_div_4_subtile) {
31410 TEST_REQUIRES_X86_XOP;
31411 for (uint32_t n = 8; n <= 12; n += 4) {
31412 for (size_t k = 1; k <= 40; k += 9) {
31413 for (uint32_t m = 1; m <= 4; m++) {
31414 GemmMicrokernelTester()
31415 .extended_weights(true)
31416 .mr(4)
31417 .nr(4)
31418 .kr(2)
31419 .sr(1)
31420 .m(m)
31421 .n(n)
31422 .k(k)
31423 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080031424 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_4x4c2__xop, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070031425 }
31426 }
31427 }
31428 }
31429
31430 TEST(QS8_GEMM_XW_MINMAX_FP32_4X4C2__XOP, strided_cm_subtile) {
31431 TEST_REQUIRES_X86_XOP;
31432 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031433 for (uint32_t n = 1; n <= 4; n++) {
31434 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan0ff79892021-08-06 16:05:06 -070031435 GemmMicrokernelTester()
31436 .extended_weights(true)
31437 .mr(4)
31438 .nr(4)
31439 .kr(2)
31440 .sr(1)
31441 .m(m)
31442 .n(n)
31443 .k(k)
31444 .cm_stride(7)
31445 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080031446 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_4x4c2__xop, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070031447 }
31448 }
31449 }
31450 }
31451
31452 TEST(QS8_GEMM_XW_MINMAX_FP32_4X4C2__XOP, strided_cm) {
31453 TEST_REQUIRES_X86_XOP;
31454 GemmMicrokernelTester()
31455 .extended_weights(true)
31456 .mr(4)
31457 .nr(4)
31458 .kr(2)
31459 .sr(1)
31460 .m(4)
31461 .n(4)
31462 .k(8)
31463 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080031464 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_4x4c2__xop, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070031465 }
31466#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
31467
31468
31469#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan0ff79892021-08-06 16:05:06 -070031470 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C8__SSE2, k_eq_8) {
31471 TEST_REQUIRES_X86_SSE2;
31472 GemmMicrokernelTester()
31473 .extended_weights(true)
31474 .mr(2)
31475 .nr(4)
31476 .kr(8)
31477 .sr(1)
31478 .m(2)
31479 .n(4)
31480 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080031481 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070031482 }
31483
31484 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C8__SSE2, strided_cn) {
31485 TEST_REQUIRES_X86_SSE2;
31486 GemmMicrokernelTester()
31487 .extended_weights(true)
31488 .mr(2)
31489 .nr(4)
31490 .kr(8)
31491 .sr(1)
31492 .m(2)
31493 .n(4)
31494 .k(8)
31495 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080031496 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070031497 }
31498
31499 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C8__SSE2, k_eq_8_strided_a) {
31500 TEST_REQUIRES_X86_SSE2;
31501 GemmMicrokernelTester()
31502 .extended_weights(true)
31503 .mr(2)
31504 .nr(4)
31505 .kr(8)
31506 .sr(1)
31507 .m(2)
31508 .n(4)
31509 .k(8)
31510 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080031511 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070031512 }
31513
31514 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C8__SSE2, k_eq_8_subtile) {
31515 TEST_REQUIRES_X86_SSE2;
Zhi An Ng83844ae2022-01-14 09:52:25 -080031516 for (uint32_t n = 1; n <= 4; n++) {
31517 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan0ff79892021-08-06 16:05:06 -070031518 GemmMicrokernelTester()
31519 .extended_weights(true)
31520 .mr(2)
31521 .nr(4)
31522 .kr(8)
31523 .sr(1)
31524 .m(m)
31525 .n(n)
31526 .k(8)
31527 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080031528 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070031529 }
31530 }
31531 }
31532
31533 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C8__SSE2, k_eq_8_subtile_m) {
31534 TEST_REQUIRES_X86_SSE2;
31535 for (uint32_t m = 1; m <= 2; m++) {
31536 GemmMicrokernelTester()
31537 .extended_weights(true)
31538 .mr(2)
31539 .nr(4)
31540 .kr(8)
31541 .sr(1)
31542 .m(m)
31543 .n(4)
31544 .k(8)
31545 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080031546 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070031547 }
31548 }
31549
31550 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C8__SSE2, k_eq_8_subtile_n) {
31551 TEST_REQUIRES_X86_SSE2;
31552 for (uint32_t n = 1; n <= 4; n++) {
31553 GemmMicrokernelTester()
31554 .extended_weights(true)
31555 .mr(2)
31556 .nr(4)
31557 .kr(8)
31558 .sr(1)
31559 .m(2)
31560 .n(n)
31561 .k(8)
31562 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080031563 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070031564 }
31565 }
31566
31567 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C8__SSE2, k_lt_8) {
31568 TEST_REQUIRES_X86_SSE2;
31569 for (size_t k = 1; k < 8; k++) {
31570 GemmMicrokernelTester()
31571 .extended_weights(true)
31572 .mr(2)
31573 .nr(4)
31574 .kr(8)
31575 .sr(1)
31576 .m(2)
31577 .n(4)
31578 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080031579 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070031580 }
31581 }
31582
31583 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C8__SSE2, k_lt_8_strided_a) {
31584 TEST_REQUIRES_X86_SSE2;
31585 for (size_t k = 1; k < 8; k++) {
31586 GemmMicrokernelTester()
31587 .extended_weights(true)
31588 .mr(2)
31589 .nr(4)
31590 .kr(8)
31591 .sr(1)
31592 .m(2)
31593 .n(4)
31594 .k(k)
31595 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080031596 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070031597 }
31598 }
31599
31600 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C8__SSE2, k_lt_8_subtile) {
31601 TEST_REQUIRES_X86_SSE2;
31602 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031603 for (uint32_t n = 1; n <= 4; n++) {
31604 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan0ff79892021-08-06 16:05:06 -070031605 GemmMicrokernelTester()
31606 .extended_weights(true)
31607 .mr(2)
31608 .nr(4)
31609 .kr(8)
31610 .sr(1)
31611 .m(m)
31612 .n(n)
31613 .k(k)
31614 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080031615 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070031616 }
31617 }
31618 }
31619 }
31620
31621 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C8__SSE2, k_gt_8) {
31622 TEST_REQUIRES_X86_SSE2;
31623 for (size_t k = 9; k < 16; k++) {
31624 GemmMicrokernelTester()
31625 .extended_weights(true)
31626 .mr(2)
31627 .nr(4)
31628 .kr(8)
31629 .sr(1)
31630 .m(2)
31631 .n(4)
31632 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080031633 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070031634 }
31635 }
31636
31637 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C8__SSE2, k_gt_8_strided_a) {
31638 TEST_REQUIRES_X86_SSE2;
31639 for (size_t k = 9; k < 16; k++) {
31640 GemmMicrokernelTester()
31641 .extended_weights(true)
31642 .mr(2)
31643 .nr(4)
31644 .kr(8)
31645 .sr(1)
31646 .m(2)
31647 .n(4)
31648 .k(k)
31649 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080031650 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070031651 }
31652 }
31653
31654 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C8__SSE2, k_gt_8_subtile) {
31655 TEST_REQUIRES_X86_SSE2;
31656 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031657 for (uint32_t n = 1; n <= 4; n++) {
31658 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan0ff79892021-08-06 16:05:06 -070031659 GemmMicrokernelTester()
31660 .extended_weights(true)
31661 .mr(2)
31662 .nr(4)
31663 .kr(8)
31664 .sr(1)
31665 .m(m)
31666 .n(n)
31667 .k(k)
31668 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080031669 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070031670 }
31671 }
31672 }
31673 }
31674
31675 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C8__SSE2, k_div_8) {
31676 TEST_REQUIRES_X86_SSE2;
31677 for (size_t k = 16; k <= 80; k += 8) {
31678 GemmMicrokernelTester()
31679 .extended_weights(true)
31680 .mr(2)
31681 .nr(4)
31682 .kr(8)
31683 .sr(1)
31684 .m(2)
31685 .n(4)
31686 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080031687 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070031688 }
31689 }
31690
31691 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C8__SSE2, k_div_8_strided_a) {
31692 TEST_REQUIRES_X86_SSE2;
31693 for (size_t k = 16; k <= 80; k += 8) {
31694 GemmMicrokernelTester()
31695 .extended_weights(true)
31696 .mr(2)
31697 .nr(4)
31698 .kr(8)
31699 .sr(1)
31700 .m(2)
31701 .n(4)
31702 .k(k)
31703 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080031704 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070031705 }
31706 }
31707
31708 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C8__SSE2, k_div_8_subtile) {
31709 TEST_REQUIRES_X86_SSE2;
31710 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031711 for (uint32_t n = 1; n <= 4; n++) {
31712 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan0ff79892021-08-06 16:05:06 -070031713 GemmMicrokernelTester()
31714 .extended_weights(true)
31715 .mr(2)
31716 .nr(4)
31717 .kr(8)
31718 .sr(1)
31719 .m(m)
31720 .n(n)
31721 .k(k)
31722 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080031723 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070031724 }
31725 }
31726 }
31727 }
31728
31729 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C8__SSE2, n_gt_4) {
31730 TEST_REQUIRES_X86_SSE2;
31731 for (uint32_t n = 5; n < 8; n++) {
31732 for (size_t k = 1; k <= 40; k += 9) {
31733 GemmMicrokernelTester()
31734 .extended_weights(true)
31735 .mr(2)
31736 .nr(4)
31737 .kr(8)
31738 .sr(1)
31739 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080031740 .n(n)
Marat Dukhan0ff79892021-08-06 16:05:06 -070031741 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080031742 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070031743 }
31744 }
31745 }
31746
31747 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C8__SSE2, n_gt_4_strided_cn) {
31748 TEST_REQUIRES_X86_SSE2;
31749 for (uint32_t n = 5; n < 8; n++) {
31750 for (size_t k = 1; k <= 40; k += 9) {
31751 GemmMicrokernelTester()
31752 .extended_weights(true)
31753 .mr(2)
31754 .nr(4)
31755 .kr(8)
31756 .sr(1)
31757 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080031758 .n(n)
Marat Dukhan0ff79892021-08-06 16:05:06 -070031759 .k(k)
31760 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080031761 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070031762 }
31763 }
31764 }
31765
31766 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C8__SSE2, n_gt_4_strided_a) {
31767 TEST_REQUIRES_X86_SSE2;
31768 for (uint32_t n = 5; n < 8; n++) {
31769 for (size_t k = 1; k <= 40; k += 9) {
31770 GemmMicrokernelTester()
31771 .extended_weights(true)
31772 .mr(2)
31773 .nr(4)
31774 .kr(8)
31775 .sr(1)
31776 .m(2)
31777 .n(n)
31778 .k(k)
31779 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080031780 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070031781 }
31782 }
31783 }
31784
31785 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C8__SSE2, n_gt_4_subtile) {
31786 TEST_REQUIRES_X86_SSE2;
31787 for (uint32_t n = 5; n < 8; n++) {
31788 for (size_t k = 1; k <= 40; k += 9) {
31789 for (uint32_t m = 1; m <= 2; m++) {
31790 GemmMicrokernelTester()
31791 .extended_weights(true)
31792 .mr(2)
31793 .nr(4)
31794 .kr(8)
31795 .sr(1)
31796 .m(m)
31797 .n(n)
31798 .k(k)
31799 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080031800 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070031801 }
31802 }
31803 }
31804 }
31805
31806 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C8__SSE2, n_div_4) {
31807 TEST_REQUIRES_X86_SSE2;
31808 for (uint32_t n = 8; n <= 12; n += 4) {
31809 for (size_t k = 1; k <= 40; k += 9) {
31810 GemmMicrokernelTester()
31811 .extended_weights(true)
31812 .mr(2)
31813 .nr(4)
31814 .kr(8)
31815 .sr(1)
31816 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080031817 .n(n)
Marat Dukhan0ff79892021-08-06 16:05:06 -070031818 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080031819 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070031820 }
31821 }
31822 }
31823
31824 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C8__SSE2, n_div_4_strided_cn) {
31825 TEST_REQUIRES_X86_SSE2;
31826 for (uint32_t n = 8; n <= 12; n += 4) {
31827 for (size_t k = 1; k <= 40; k += 9) {
31828 GemmMicrokernelTester()
31829 .extended_weights(true)
31830 .mr(2)
31831 .nr(4)
31832 .kr(8)
31833 .sr(1)
31834 .m(2)
31835 .n(n)
31836 .k(k)
31837 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080031838 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070031839 }
31840 }
31841 }
31842
31843 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C8__SSE2, n_div_4_strided_a) {
31844 TEST_REQUIRES_X86_SSE2;
31845 for (uint32_t n = 8; n <= 12; n += 4) {
31846 for (size_t k = 1; k <= 40; k += 9) {
31847 GemmMicrokernelTester()
31848 .extended_weights(true)
31849 .mr(2)
31850 .nr(4)
31851 .kr(8)
31852 .sr(1)
31853 .m(2)
31854 .n(n)
31855 .k(k)
31856 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080031857 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070031858 }
31859 }
31860 }
31861
31862 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C8__SSE2, n_div_4_subtile) {
31863 TEST_REQUIRES_X86_SSE2;
31864 for (uint32_t n = 8; n <= 12; n += 4) {
31865 for (size_t k = 1; k <= 40; k += 9) {
31866 for (uint32_t m = 1; m <= 2; m++) {
31867 GemmMicrokernelTester()
31868 .extended_weights(true)
31869 .mr(2)
31870 .nr(4)
31871 .kr(8)
31872 .sr(1)
31873 .m(m)
31874 .n(n)
31875 .k(k)
31876 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080031877 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070031878 }
31879 }
31880 }
31881 }
31882
31883 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C8__SSE2, strided_cm_subtile) {
31884 TEST_REQUIRES_X86_SSE2;
31885 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031886 for (uint32_t n = 1; n <= 4; n++) {
31887 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan0ff79892021-08-06 16:05:06 -070031888 GemmMicrokernelTester()
31889 .extended_weights(true)
31890 .mr(2)
31891 .nr(4)
31892 .kr(8)
31893 .sr(1)
31894 .m(m)
31895 .n(n)
31896 .k(k)
31897 .cm_stride(7)
31898 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080031899 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070031900 }
31901 }
31902 }
31903 }
31904
31905 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C8__SSE2, strided_cm) {
31906 TEST_REQUIRES_X86_SSE2;
31907 GemmMicrokernelTester()
31908 .extended_weights(true)
31909 .mr(2)
31910 .nr(4)
31911 .kr(8)
31912 .sr(1)
31913 .m(2)
31914 .n(4)
31915 .k(8)
31916 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080031917 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070031918 }
31919#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
31920
31921
31922#if XNN_ARCH_X86 || XNN_ARCH_X86_64
31923 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C8__SSE2, k_eq_8) {
31924 TEST_REQUIRES_X86_SSE2;
31925 GemmMicrokernelTester()
31926 .extended_weights(true)
31927 .mr(3)
31928 .nr(4)
31929 .kr(8)
31930 .sr(1)
31931 .m(3)
31932 .n(4)
31933 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080031934 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070031935 }
31936
31937 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C8__SSE2, strided_cn) {
31938 TEST_REQUIRES_X86_SSE2;
31939 GemmMicrokernelTester()
31940 .extended_weights(true)
31941 .mr(3)
31942 .nr(4)
31943 .kr(8)
31944 .sr(1)
31945 .m(3)
31946 .n(4)
31947 .k(8)
31948 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080031949 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070031950 }
31951
31952 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C8__SSE2, k_eq_8_strided_a) {
31953 TEST_REQUIRES_X86_SSE2;
31954 GemmMicrokernelTester()
31955 .extended_weights(true)
31956 .mr(3)
31957 .nr(4)
31958 .kr(8)
31959 .sr(1)
31960 .m(3)
31961 .n(4)
31962 .k(8)
31963 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080031964 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070031965 }
31966
31967 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C8__SSE2, k_eq_8_subtile) {
31968 TEST_REQUIRES_X86_SSE2;
Zhi An Ng83844ae2022-01-14 09:52:25 -080031969 for (uint32_t n = 1; n <= 4; n++) {
31970 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan0ff79892021-08-06 16:05:06 -070031971 GemmMicrokernelTester()
31972 .extended_weights(true)
31973 .mr(3)
31974 .nr(4)
31975 .kr(8)
31976 .sr(1)
31977 .m(m)
31978 .n(n)
31979 .k(8)
31980 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080031981 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070031982 }
31983 }
31984 }
31985
31986 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C8__SSE2, k_eq_8_subtile_m) {
31987 TEST_REQUIRES_X86_SSE2;
31988 for (uint32_t m = 1; m <= 3; m++) {
31989 GemmMicrokernelTester()
31990 .extended_weights(true)
31991 .mr(3)
31992 .nr(4)
31993 .kr(8)
31994 .sr(1)
31995 .m(m)
31996 .n(4)
31997 .k(8)
31998 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080031999 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070032000 }
32001 }
32002
32003 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C8__SSE2, k_eq_8_subtile_n) {
32004 TEST_REQUIRES_X86_SSE2;
32005 for (uint32_t n = 1; n <= 4; n++) {
32006 GemmMicrokernelTester()
32007 .extended_weights(true)
32008 .mr(3)
32009 .nr(4)
32010 .kr(8)
32011 .sr(1)
32012 .m(3)
32013 .n(n)
32014 .k(8)
32015 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080032016 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070032017 }
32018 }
32019
32020 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C8__SSE2, k_lt_8) {
32021 TEST_REQUIRES_X86_SSE2;
32022 for (size_t k = 1; k < 8; k++) {
32023 GemmMicrokernelTester()
32024 .extended_weights(true)
32025 .mr(3)
32026 .nr(4)
32027 .kr(8)
32028 .sr(1)
32029 .m(3)
32030 .n(4)
32031 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080032032 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070032033 }
32034 }
32035
32036 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C8__SSE2, k_lt_8_strided_a) {
32037 TEST_REQUIRES_X86_SSE2;
32038 for (size_t k = 1; k < 8; k++) {
32039 GemmMicrokernelTester()
32040 .extended_weights(true)
32041 .mr(3)
32042 .nr(4)
32043 .kr(8)
32044 .sr(1)
32045 .m(3)
32046 .n(4)
32047 .k(k)
32048 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080032049 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070032050 }
32051 }
32052
32053 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C8__SSE2, k_lt_8_subtile) {
32054 TEST_REQUIRES_X86_SSE2;
32055 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080032056 for (uint32_t n = 1; n <= 4; n++) {
32057 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan0ff79892021-08-06 16:05:06 -070032058 GemmMicrokernelTester()
32059 .extended_weights(true)
32060 .mr(3)
32061 .nr(4)
32062 .kr(8)
32063 .sr(1)
32064 .m(m)
32065 .n(n)
32066 .k(k)
32067 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080032068 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070032069 }
32070 }
32071 }
32072 }
32073
32074 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C8__SSE2, k_gt_8) {
32075 TEST_REQUIRES_X86_SSE2;
32076 for (size_t k = 9; k < 16; k++) {
32077 GemmMicrokernelTester()
32078 .extended_weights(true)
32079 .mr(3)
32080 .nr(4)
32081 .kr(8)
32082 .sr(1)
32083 .m(3)
32084 .n(4)
32085 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080032086 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070032087 }
32088 }
32089
32090 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C8__SSE2, k_gt_8_strided_a) {
32091 TEST_REQUIRES_X86_SSE2;
32092 for (size_t k = 9; k < 16; k++) {
32093 GemmMicrokernelTester()
32094 .extended_weights(true)
32095 .mr(3)
32096 .nr(4)
32097 .kr(8)
32098 .sr(1)
32099 .m(3)
32100 .n(4)
32101 .k(k)
32102 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080032103 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070032104 }
32105 }
32106
32107 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C8__SSE2, k_gt_8_subtile) {
32108 TEST_REQUIRES_X86_SSE2;
32109 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080032110 for (uint32_t n = 1; n <= 4; n++) {
32111 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan0ff79892021-08-06 16:05:06 -070032112 GemmMicrokernelTester()
32113 .extended_weights(true)
32114 .mr(3)
32115 .nr(4)
32116 .kr(8)
32117 .sr(1)
32118 .m(m)
32119 .n(n)
32120 .k(k)
32121 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080032122 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070032123 }
32124 }
32125 }
32126 }
32127
32128 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C8__SSE2, k_div_8) {
32129 TEST_REQUIRES_X86_SSE2;
32130 for (size_t k = 16; k <= 80; k += 8) {
32131 GemmMicrokernelTester()
32132 .extended_weights(true)
32133 .mr(3)
32134 .nr(4)
32135 .kr(8)
32136 .sr(1)
32137 .m(3)
32138 .n(4)
32139 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080032140 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070032141 }
32142 }
32143
32144 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C8__SSE2, k_div_8_strided_a) {
32145 TEST_REQUIRES_X86_SSE2;
32146 for (size_t k = 16; k <= 80; k += 8) {
32147 GemmMicrokernelTester()
32148 .extended_weights(true)
32149 .mr(3)
32150 .nr(4)
32151 .kr(8)
32152 .sr(1)
32153 .m(3)
32154 .n(4)
32155 .k(k)
32156 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080032157 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070032158 }
32159 }
32160
32161 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C8__SSE2, k_div_8_subtile) {
32162 TEST_REQUIRES_X86_SSE2;
32163 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080032164 for (uint32_t n = 1; n <= 4; n++) {
32165 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan0ff79892021-08-06 16:05:06 -070032166 GemmMicrokernelTester()
32167 .extended_weights(true)
32168 .mr(3)
32169 .nr(4)
32170 .kr(8)
32171 .sr(1)
32172 .m(m)
32173 .n(n)
32174 .k(k)
32175 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080032176 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070032177 }
32178 }
32179 }
32180 }
32181
32182 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C8__SSE2, n_gt_4) {
32183 TEST_REQUIRES_X86_SSE2;
32184 for (uint32_t n = 5; n < 8; n++) {
32185 for (size_t k = 1; k <= 40; k += 9) {
32186 GemmMicrokernelTester()
32187 .extended_weights(true)
32188 .mr(3)
32189 .nr(4)
32190 .kr(8)
32191 .sr(1)
32192 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080032193 .n(n)
Marat Dukhan0ff79892021-08-06 16:05:06 -070032194 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080032195 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070032196 }
32197 }
32198 }
32199
32200 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C8__SSE2, n_gt_4_strided_cn) {
32201 TEST_REQUIRES_X86_SSE2;
32202 for (uint32_t n = 5; n < 8; n++) {
32203 for (size_t k = 1; k <= 40; k += 9) {
32204 GemmMicrokernelTester()
32205 .extended_weights(true)
32206 .mr(3)
32207 .nr(4)
32208 .kr(8)
32209 .sr(1)
32210 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080032211 .n(n)
Marat Dukhan0ff79892021-08-06 16:05:06 -070032212 .k(k)
32213 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080032214 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070032215 }
32216 }
32217 }
32218
32219 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C8__SSE2, n_gt_4_strided_a) {
32220 TEST_REQUIRES_X86_SSE2;
32221 for (uint32_t n = 5; n < 8; n++) {
32222 for (size_t k = 1; k <= 40; k += 9) {
32223 GemmMicrokernelTester()
32224 .extended_weights(true)
32225 .mr(3)
32226 .nr(4)
32227 .kr(8)
32228 .sr(1)
32229 .m(3)
32230 .n(n)
32231 .k(k)
32232 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080032233 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070032234 }
32235 }
32236 }
32237
32238 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C8__SSE2, n_gt_4_subtile) {
32239 TEST_REQUIRES_X86_SSE2;
32240 for (uint32_t n = 5; n < 8; n++) {
32241 for (size_t k = 1; k <= 40; k += 9) {
32242 for (uint32_t m = 1; m <= 3; m++) {
32243 GemmMicrokernelTester()
32244 .extended_weights(true)
32245 .mr(3)
32246 .nr(4)
32247 .kr(8)
32248 .sr(1)
32249 .m(m)
32250 .n(n)
32251 .k(k)
32252 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080032253 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070032254 }
32255 }
32256 }
32257 }
32258
32259 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C8__SSE2, n_div_4) {
32260 TEST_REQUIRES_X86_SSE2;
32261 for (uint32_t n = 8; n <= 12; n += 4) {
32262 for (size_t k = 1; k <= 40; k += 9) {
32263 GemmMicrokernelTester()
32264 .extended_weights(true)
32265 .mr(3)
32266 .nr(4)
32267 .kr(8)
32268 .sr(1)
32269 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080032270 .n(n)
Marat Dukhan0ff79892021-08-06 16:05:06 -070032271 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080032272 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070032273 }
32274 }
32275 }
32276
32277 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C8__SSE2, n_div_4_strided_cn) {
32278 TEST_REQUIRES_X86_SSE2;
32279 for (uint32_t n = 8; n <= 12; n += 4) {
32280 for (size_t k = 1; k <= 40; k += 9) {
32281 GemmMicrokernelTester()
32282 .extended_weights(true)
32283 .mr(3)
32284 .nr(4)
32285 .kr(8)
32286 .sr(1)
32287 .m(3)
32288 .n(n)
32289 .k(k)
32290 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080032291 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070032292 }
32293 }
32294 }
32295
32296 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C8__SSE2, n_div_4_strided_a) {
32297 TEST_REQUIRES_X86_SSE2;
32298 for (uint32_t n = 8; n <= 12; n += 4) {
32299 for (size_t k = 1; k <= 40; k += 9) {
32300 GemmMicrokernelTester()
32301 .extended_weights(true)
32302 .mr(3)
32303 .nr(4)
32304 .kr(8)
32305 .sr(1)
32306 .m(3)
32307 .n(n)
32308 .k(k)
32309 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080032310 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070032311 }
32312 }
32313 }
32314
32315 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C8__SSE2, n_div_4_subtile) {
32316 TEST_REQUIRES_X86_SSE2;
32317 for (uint32_t n = 8; n <= 12; n += 4) {
32318 for (size_t k = 1; k <= 40; k += 9) {
32319 for (uint32_t m = 1; m <= 3; m++) {
32320 GemmMicrokernelTester()
32321 .extended_weights(true)
32322 .mr(3)
32323 .nr(4)
32324 .kr(8)
32325 .sr(1)
32326 .m(m)
32327 .n(n)
32328 .k(k)
32329 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080032330 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070032331 }
32332 }
32333 }
32334 }
32335
32336 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C8__SSE2, strided_cm_subtile) {
32337 TEST_REQUIRES_X86_SSE2;
32338 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080032339 for (uint32_t n = 1; n <= 4; n++) {
32340 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan0ff79892021-08-06 16:05:06 -070032341 GemmMicrokernelTester()
32342 .extended_weights(true)
32343 .mr(3)
32344 .nr(4)
32345 .kr(8)
32346 .sr(1)
32347 .m(m)
32348 .n(n)
32349 .k(k)
32350 .cm_stride(7)
32351 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080032352 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070032353 }
32354 }
32355 }
32356 }
32357
32358 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C8__SSE2, strided_cm) {
32359 TEST_REQUIRES_X86_SSE2;
32360 GemmMicrokernelTester()
32361 .extended_weights(true)
32362 .mr(3)
32363 .nr(4)
32364 .kr(8)
32365 .sr(1)
32366 .m(3)
32367 .n(4)
32368 .k(8)
32369 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080032370 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__sse2, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070032371 }
32372#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
32373
32374
32375#if XNN_ARCH_X86 || XNN_ARCH_X86_64
32376 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__SSSE3, k_eq_8) {
32377 TEST_REQUIRES_X86_SSSE3;
32378 GemmMicrokernelTester()
32379 .extended_weights(true)
32380 .mr(1)
32381 .nr(4)
32382 .kr(8)
32383 .sr(1)
32384 .m(1)
32385 .n(4)
32386 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080032387 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__ssse3, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070032388 }
32389
32390 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__SSSE3, strided_cn) {
32391 TEST_REQUIRES_X86_SSSE3;
32392 GemmMicrokernelTester()
32393 .extended_weights(true)
32394 .mr(1)
32395 .nr(4)
32396 .kr(8)
32397 .sr(1)
32398 .m(1)
32399 .n(4)
32400 .k(8)
32401 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080032402 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__ssse3, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070032403 }
32404
32405 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__SSSE3, k_eq_8_strided_a) {
32406 TEST_REQUIRES_X86_SSSE3;
32407 GemmMicrokernelTester()
32408 .extended_weights(true)
32409 .mr(1)
32410 .nr(4)
32411 .kr(8)
32412 .sr(1)
32413 .m(1)
32414 .n(4)
32415 .k(8)
32416 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080032417 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__ssse3, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070032418 }
32419
32420 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__SSSE3, k_eq_8_subtile) {
32421 TEST_REQUIRES_X86_SSSE3;
Zhi An Ng83844ae2022-01-14 09:52:25 -080032422 for (uint32_t n = 1; n <= 4; n++) {
32423 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan0ff79892021-08-06 16:05:06 -070032424 GemmMicrokernelTester()
32425 .extended_weights(true)
32426 .mr(1)
32427 .nr(4)
32428 .kr(8)
32429 .sr(1)
32430 .m(m)
32431 .n(n)
32432 .k(8)
32433 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080032434 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__ssse3, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070032435 }
32436 }
32437 }
32438
32439 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__SSSE3, k_eq_8_subtile_m) {
32440 TEST_REQUIRES_X86_SSSE3;
32441 for (uint32_t m = 1; m <= 1; m++) {
32442 GemmMicrokernelTester()
32443 .extended_weights(true)
32444 .mr(1)
32445 .nr(4)
32446 .kr(8)
32447 .sr(1)
32448 .m(m)
32449 .n(4)
32450 .k(8)
32451 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080032452 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__ssse3, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070032453 }
32454 }
32455
32456 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__SSSE3, k_eq_8_subtile_n) {
32457 TEST_REQUIRES_X86_SSSE3;
32458 for (uint32_t n = 1; n <= 4; n++) {
32459 GemmMicrokernelTester()
32460 .extended_weights(true)
32461 .mr(1)
32462 .nr(4)
32463 .kr(8)
32464 .sr(1)
32465 .m(1)
32466 .n(n)
32467 .k(8)
32468 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080032469 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__ssse3, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070032470 }
32471 }
32472
32473 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__SSSE3, k_lt_8) {
32474 TEST_REQUIRES_X86_SSSE3;
32475 for (size_t k = 1; k < 8; k++) {
32476 GemmMicrokernelTester()
32477 .extended_weights(true)
32478 .mr(1)
32479 .nr(4)
32480 .kr(8)
32481 .sr(1)
32482 .m(1)
32483 .n(4)
32484 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080032485 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__ssse3, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070032486 }
32487 }
32488
32489 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__SSSE3, k_lt_8_strided_a) {
32490 TEST_REQUIRES_X86_SSSE3;
32491 for (size_t k = 1; k < 8; k++) {
32492 GemmMicrokernelTester()
32493 .extended_weights(true)
32494 .mr(1)
32495 .nr(4)
32496 .kr(8)
32497 .sr(1)
32498 .m(1)
32499 .n(4)
32500 .k(k)
32501 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080032502 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__ssse3, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070032503 }
32504 }
32505
32506 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__SSSE3, k_lt_8_subtile) {
32507 TEST_REQUIRES_X86_SSSE3;
32508 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080032509 for (uint32_t n = 1; n <= 4; n++) {
32510 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan0ff79892021-08-06 16:05:06 -070032511 GemmMicrokernelTester()
32512 .extended_weights(true)
32513 .mr(1)
32514 .nr(4)
32515 .kr(8)
32516 .sr(1)
32517 .m(m)
32518 .n(n)
32519 .k(k)
32520 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080032521 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__ssse3, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070032522 }
32523 }
32524 }
32525 }
32526
32527 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__SSSE3, k_gt_8) {
32528 TEST_REQUIRES_X86_SSSE3;
32529 for (size_t k = 9; k < 16; k++) {
32530 GemmMicrokernelTester()
32531 .extended_weights(true)
32532 .mr(1)
32533 .nr(4)
32534 .kr(8)
32535 .sr(1)
32536 .m(1)
32537 .n(4)
32538 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080032539 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__ssse3, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070032540 }
32541 }
32542
32543 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__SSSE3, k_gt_8_strided_a) {
32544 TEST_REQUIRES_X86_SSSE3;
32545 for (size_t k = 9; k < 16; k++) {
32546 GemmMicrokernelTester()
32547 .extended_weights(true)
32548 .mr(1)
32549 .nr(4)
32550 .kr(8)
32551 .sr(1)
32552 .m(1)
32553 .n(4)
32554 .k(k)
32555 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080032556 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__ssse3, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070032557 }
32558 }
32559
32560 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__SSSE3, k_gt_8_subtile) {
32561 TEST_REQUIRES_X86_SSSE3;
32562 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080032563 for (uint32_t n = 1; n <= 4; n++) {
32564 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan0ff79892021-08-06 16:05:06 -070032565 GemmMicrokernelTester()
32566 .extended_weights(true)
32567 .mr(1)
32568 .nr(4)
32569 .kr(8)
32570 .sr(1)
32571 .m(m)
32572 .n(n)
32573 .k(k)
32574 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080032575 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__ssse3, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070032576 }
32577 }
32578 }
32579 }
32580
32581 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__SSSE3, k_div_8) {
32582 TEST_REQUIRES_X86_SSSE3;
32583 for (size_t k = 16; k <= 80; k += 8) {
32584 GemmMicrokernelTester()
32585 .extended_weights(true)
32586 .mr(1)
32587 .nr(4)
32588 .kr(8)
32589 .sr(1)
32590 .m(1)
32591 .n(4)
32592 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080032593 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__ssse3, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070032594 }
32595 }
32596
32597 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__SSSE3, k_div_8_strided_a) {
32598 TEST_REQUIRES_X86_SSSE3;
32599 for (size_t k = 16; k <= 80; k += 8) {
32600 GemmMicrokernelTester()
32601 .extended_weights(true)
32602 .mr(1)
32603 .nr(4)
32604 .kr(8)
32605 .sr(1)
32606 .m(1)
32607 .n(4)
32608 .k(k)
32609 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080032610 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__ssse3, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070032611 }
32612 }
32613
32614 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__SSSE3, k_div_8_subtile) {
32615 TEST_REQUIRES_X86_SSSE3;
32616 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080032617 for (uint32_t n = 1; n <= 4; n++) {
32618 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan0ff79892021-08-06 16:05:06 -070032619 GemmMicrokernelTester()
32620 .extended_weights(true)
32621 .mr(1)
32622 .nr(4)
32623 .kr(8)
32624 .sr(1)
32625 .m(m)
32626 .n(n)
32627 .k(k)
32628 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080032629 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__ssse3, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070032630 }
32631 }
32632 }
32633 }
32634
32635 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__SSSE3, n_gt_4) {
32636 TEST_REQUIRES_X86_SSSE3;
32637 for (uint32_t n = 5; n < 8; n++) {
32638 for (size_t k = 1; k <= 40; k += 9) {
32639 GemmMicrokernelTester()
32640 .extended_weights(true)
32641 .mr(1)
32642 .nr(4)
32643 .kr(8)
32644 .sr(1)
32645 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080032646 .n(n)
Marat Dukhan0ff79892021-08-06 16:05:06 -070032647 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080032648 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__ssse3, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070032649 }
32650 }
32651 }
32652
32653 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__SSSE3, n_gt_4_strided_cn) {
32654 TEST_REQUIRES_X86_SSSE3;
32655 for (uint32_t n = 5; n < 8; n++) {
32656 for (size_t k = 1; k <= 40; k += 9) {
32657 GemmMicrokernelTester()
32658 .extended_weights(true)
32659 .mr(1)
32660 .nr(4)
32661 .kr(8)
32662 .sr(1)
32663 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080032664 .n(n)
Marat Dukhan0ff79892021-08-06 16:05:06 -070032665 .k(k)
32666 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080032667 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__ssse3, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070032668 }
32669 }
32670 }
32671
32672 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__SSSE3, n_gt_4_strided_a) {
32673 TEST_REQUIRES_X86_SSSE3;
32674 for (uint32_t n = 5; n < 8; n++) {
32675 for (size_t k = 1; k <= 40; k += 9) {
32676 GemmMicrokernelTester()
32677 .extended_weights(true)
32678 .mr(1)
32679 .nr(4)
32680 .kr(8)
32681 .sr(1)
32682 .m(1)
32683 .n(n)
32684 .k(k)
32685 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080032686 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__ssse3, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070032687 }
32688 }
32689 }
32690
32691 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__SSSE3, n_gt_4_subtile) {
32692 TEST_REQUIRES_X86_SSSE3;
32693 for (uint32_t n = 5; n < 8; n++) {
32694 for (size_t k = 1; k <= 40; k += 9) {
32695 for (uint32_t m = 1; m <= 1; m++) {
32696 GemmMicrokernelTester()
32697 .extended_weights(true)
32698 .mr(1)
32699 .nr(4)
32700 .kr(8)
32701 .sr(1)
32702 .m(m)
32703 .n(n)
32704 .k(k)
32705 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080032706 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__ssse3, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070032707 }
32708 }
32709 }
32710 }
32711
32712 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__SSSE3, n_div_4) {
32713 TEST_REQUIRES_X86_SSSE3;
32714 for (uint32_t n = 8; n <= 12; n += 4) {
32715 for (size_t k = 1; k <= 40; k += 9) {
32716 GemmMicrokernelTester()
32717 .extended_weights(true)
32718 .mr(1)
32719 .nr(4)
32720 .kr(8)
32721 .sr(1)
32722 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080032723 .n(n)
Marat Dukhan0ff79892021-08-06 16:05:06 -070032724 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080032725 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__ssse3, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070032726 }
32727 }
32728 }
32729
32730 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__SSSE3, n_div_4_strided_cn) {
32731 TEST_REQUIRES_X86_SSSE3;
32732 for (uint32_t n = 8; n <= 12; n += 4) {
32733 for (size_t k = 1; k <= 40; k += 9) {
32734 GemmMicrokernelTester()
32735 .extended_weights(true)
32736 .mr(1)
32737 .nr(4)
32738 .kr(8)
32739 .sr(1)
32740 .m(1)
32741 .n(n)
32742 .k(k)
32743 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080032744 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__ssse3, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070032745 }
32746 }
32747 }
32748
32749 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__SSSE3, n_div_4_strided_a) {
32750 TEST_REQUIRES_X86_SSSE3;
32751 for (uint32_t n = 8; n <= 12; n += 4) {
32752 for (size_t k = 1; k <= 40; k += 9) {
32753 GemmMicrokernelTester()
32754 .extended_weights(true)
32755 .mr(1)
32756 .nr(4)
32757 .kr(8)
32758 .sr(1)
32759 .m(1)
32760 .n(n)
32761 .k(k)
32762 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080032763 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__ssse3, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070032764 }
32765 }
32766 }
32767
32768 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__SSSE3, n_div_4_subtile) {
32769 TEST_REQUIRES_X86_SSSE3;
32770 for (uint32_t n = 8; n <= 12; n += 4) {
32771 for (size_t k = 1; k <= 40; k += 9) {
32772 for (uint32_t m = 1; m <= 1; m++) {
32773 GemmMicrokernelTester()
32774 .extended_weights(true)
32775 .mr(1)
32776 .nr(4)
32777 .kr(8)
32778 .sr(1)
32779 .m(m)
32780 .n(n)
32781 .k(k)
32782 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080032783 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__ssse3, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070032784 }
32785 }
32786 }
32787 }
32788
32789 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__SSSE3, strided_cm_subtile) {
32790 TEST_REQUIRES_X86_SSSE3;
32791 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080032792 for (uint32_t n = 1; n <= 4; n++) {
32793 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan0ff79892021-08-06 16:05:06 -070032794 GemmMicrokernelTester()
32795 .extended_weights(true)
32796 .mr(1)
32797 .nr(4)
32798 .kr(8)
32799 .sr(1)
32800 .m(m)
32801 .n(n)
32802 .k(k)
32803 .cm_stride(7)
32804 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080032805 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__ssse3, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070032806 }
32807 }
32808 }
32809 }
32810
32811 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__SSSE3, strided_cm) {
32812 TEST_REQUIRES_X86_SSSE3;
32813 GemmMicrokernelTester()
32814 .extended_weights(true)
32815 .mr(1)
32816 .nr(4)
32817 .kr(8)
32818 .sr(1)
32819 .m(1)
32820 .n(4)
32821 .k(8)
32822 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080032823 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__ssse3, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070032824 }
32825#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
32826
32827
32828#if XNN_ARCH_X86 || XNN_ARCH_X86_64
32829 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C8__SSSE3, k_eq_8) {
32830 TEST_REQUIRES_X86_SSSE3;
32831 GemmMicrokernelTester()
32832 .extended_weights(true)
32833 .mr(2)
32834 .nr(4)
32835 .kr(8)
32836 .sr(1)
32837 .m(2)
32838 .n(4)
32839 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080032840 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__ssse3, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070032841 }
32842
32843 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C8__SSSE3, strided_cn) {
32844 TEST_REQUIRES_X86_SSSE3;
32845 GemmMicrokernelTester()
32846 .extended_weights(true)
32847 .mr(2)
32848 .nr(4)
32849 .kr(8)
32850 .sr(1)
32851 .m(2)
32852 .n(4)
32853 .k(8)
32854 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080032855 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__ssse3, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070032856 }
32857
32858 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C8__SSSE3, k_eq_8_strided_a) {
32859 TEST_REQUIRES_X86_SSSE3;
32860 GemmMicrokernelTester()
32861 .extended_weights(true)
32862 .mr(2)
32863 .nr(4)
32864 .kr(8)
32865 .sr(1)
32866 .m(2)
32867 .n(4)
32868 .k(8)
32869 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080032870 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__ssse3, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070032871 }
32872
32873 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C8__SSSE3, k_eq_8_subtile) {
32874 TEST_REQUIRES_X86_SSSE3;
Zhi An Ng83844ae2022-01-14 09:52:25 -080032875 for (uint32_t n = 1; n <= 4; n++) {
32876 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan0ff79892021-08-06 16:05:06 -070032877 GemmMicrokernelTester()
32878 .extended_weights(true)
32879 .mr(2)
32880 .nr(4)
32881 .kr(8)
32882 .sr(1)
32883 .m(m)
32884 .n(n)
32885 .k(8)
32886 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080032887 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__ssse3, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070032888 }
32889 }
32890 }
32891
32892 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C8__SSSE3, k_eq_8_subtile_m) {
32893 TEST_REQUIRES_X86_SSSE3;
32894 for (uint32_t m = 1; m <= 2; m++) {
32895 GemmMicrokernelTester()
32896 .extended_weights(true)
32897 .mr(2)
32898 .nr(4)
32899 .kr(8)
32900 .sr(1)
32901 .m(m)
32902 .n(4)
32903 .k(8)
32904 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080032905 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__ssse3, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070032906 }
32907 }
32908
32909 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C8__SSSE3, k_eq_8_subtile_n) {
32910 TEST_REQUIRES_X86_SSSE3;
32911 for (uint32_t n = 1; n <= 4; n++) {
32912 GemmMicrokernelTester()
32913 .extended_weights(true)
32914 .mr(2)
32915 .nr(4)
32916 .kr(8)
32917 .sr(1)
32918 .m(2)
32919 .n(n)
32920 .k(8)
32921 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080032922 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__ssse3, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070032923 }
32924 }
32925
32926 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C8__SSSE3, k_lt_8) {
32927 TEST_REQUIRES_X86_SSSE3;
32928 for (size_t k = 1; k < 8; k++) {
32929 GemmMicrokernelTester()
32930 .extended_weights(true)
32931 .mr(2)
32932 .nr(4)
32933 .kr(8)
32934 .sr(1)
32935 .m(2)
32936 .n(4)
32937 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080032938 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__ssse3, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070032939 }
32940 }
32941
32942 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C8__SSSE3, k_lt_8_strided_a) {
32943 TEST_REQUIRES_X86_SSSE3;
32944 for (size_t k = 1; k < 8; k++) {
32945 GemmMicrokernelTester()
32946 .extended_weights(true)
32947 .mr(2)
32948 .nr(4)
32949 .kr(8)
32950 .sr(1)
32951 .m(2)
32952 .n(4)
32953 .k(k)
32954 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080032955 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__ssse3, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070032956 }
32957 }
32958
32959 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C8__SSSE3, k_lt_8_subtile) {
32960 TEST_REQUIRES_X86_SSSE3;
32961 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080032962 for (uint32_t n = 1; n <= 4; n++) {
32963 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan0ff79892021-08-06 16:05:06 -070032964 GemmMicrokernelTester()
32965 .extended_weights(true)
32966 .mr(2)
32967 .nr(4)
32968 .kr(8)
32969 .sr(1)
32970 .m(m)
32971 .n(n)
32972 .k(k)
32973 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080032974 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__ssse3, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070032975 }
32976 }
32977 }
32978 }
32979
32980 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C8__SSSE3, k_gt_8) {
32981 TEST_REQUIRES_X86_SSSE3;
32982 for (size_t k = 9; k < 16; k++) {
32983 GemmMicrokernelTester()
32984 .extended_weights(true)
32985 .mr(2)
32986 .nr(4)
32987 .kr(8)
32988 .sr(1)
32989 .m(2)
32990 .n(4)
32991 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080032992 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__ssse3, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070032993 }
32994 }
32995
32996 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C8__SSSE3, k_gt_8_strided_a) {
32997 TEST_REQUIRES_X86_SSSE3;
32998 for (size_t k = 9; k < 16; k++) {
32999 GemmMicrokernelTester()
33000 .extended_weights(true)
33001 .mr(2)
33002 .nr(4)
33003 .kr(8)
33004 .sr(1)
33005 .m(2)
33006 .n(4)
33007 .k(k)
33008 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080033009 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__ssse3, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070033010 }
33011 }
33012
33013 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C8__SSSE3, k_gt_8_subtile) {
33014 TEST_REQUIRES_X86_SSSE3;
33015 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080033016 for (uint32_t n = 1; n <= 4; n++) {
33017 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan0ff79892021-08-06 16:05:06 -070033018 GemmMicrokernelTester()
33019 .extended_weights(true)
33020 .mr(2)
33021 .nr(4)
33022 .kr(8)
33023 .sr(1)
33024 .m(m)
33025 .n(n)
33026 .k(k)
33027 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080033028 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__ssse3, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070033029 }
33030 }
33031 }
33032 }
33033
33034 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C8__SSSE3, k_div_8) {
33035 TEST_REQUIRES_X86_SSSE3;
33036 for (size_t k = 16; k <= 80; k += 8) {
33037 GemmMicrokernelTester()
33038 .extended_weights(true)
33039 .mr(2)
33040 .nr(4)
33041 .kr(8)
33042 .sr(1)
33043 .m(2)
33044 .n(4)
33045 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080033046 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__ssse3, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070033047 }
33048 }
33049
33050 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C8__SSSE3, k_div_8_strided_a) {
33051 TEST_REQUIRES_X86_SSSE3;
33052 for (size_t k = 16; k <= 80; k += 8) {
33053 GemmMicrokernelTester()
33054 .extended_weights(true)
33055 .mr(2)
33056 .nr(4)
33057 .kr(8)
33058 .sr(1)
33059 .m(2)
33060 .n(4)
33061 .k(k)
33062 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080033063 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__ssse3, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070033064 }
33065 }
33066
33067 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C8__SSSE3, k_div_8_subtile) {
33068 TEST_REQUIRES_X86_SSSE3;
33069 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080033070 for (uint32_t n = 1; n <= 4; n++) {
33071 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan0ff79892021-08-06 16:05:06 -070033072 GemmMicrokernelTester()
33073 .extended_weights(true)
33074 .mr(2)
33075 .nr(4)
33076 .kr(8)
33077 .sr(1)
33078 .m(m)
33079 .n(n)
33080 .k(k)
33081 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080033082 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__ssse3, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070033083 }
33084 }
33085 }
33086 }
33087
33088 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C8__SSSE3, n_gt_4) {
33089 TEST_REQUIRES_X86_SSSE3;
33090 for (uint32_t n = 5; n < 8; n++) {
33091 for (size_t k = 1; k <= 40; k += 9) {
33092 GemmMicrokernelTester()
33093 .extended_weights(true)
33094 .mr(2)
33095 .nr(4)
33096 .kr(8)
33097 .sr(1)
33098 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080033099 .n(n)
Marat Dukhan0ff79892021-08-06 16:05:06 -070033100 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080033101 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__ssse3, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070033102 }
33103 }
33104 }
33105
33106 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C8__SSSE3, n_gt_4_strided_cn) {
33107 TEST_REQUIRES_X86_SSSE3;
33108 for (uint32_t n = 5; n < 8; n++) {
33109 for (size_t k = 1; k <= 40; k += 9) {
33110 GemmMicrokernelTester()
33111 .extended_weights(true)
33112 .mr(2)
33113 .nr(4)
33114 .kr(8)
33115 .sr(1)
33116 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080033117 .n(n)
Marat Dukhan0ff79892021-08-06 16:05:06 -070033118 .k(k)
33119 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080033120 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__ssse3, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070033121 }
33122 }
33123 }
33124
33125 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C8__SSSE3, n_gt_4_strided_a) {
33126 TEST_REQUIRES_X86_SSSE3;
33127 for (uint32_t n = 5; n < 8; n++) {
33128 for (size_t k = 1; k <= 40; k += 9) {
33129 GemmMicrokernelTester()
33130 .extended_weights(true)
33131 .mr(2)
33132 .nr(4)
33133 .kr(8)
33134 .sr(1)
33135 .m(2)
33136 .n(n)
33137 .k(k)
33138 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080033139 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__ssse3, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070033140 }
33141 }
33142 }
33143
33144 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C8__SSSE3, n_gt_4_subtile) {
33145 TEST_REQUIRES_X86_SSSE3;
33146 for (uint32_t n = 5; n < 8; n++) {
33147 for (size_t k = 1; k <= 40; k += 9) {
33148 for (uint32_t m = 1; m <= 2; m++) {
33149 GemmMicrokernelTester()
33150 .extended_weights(true)
33151 .mr(2)
33152 .nr(4)
33153 .kr(8)
33154 .sr(1)
33155 .m(m)
33156 .n(n)
33157 .k(k)
33158 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080033159 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__ssse3, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070033160 }
33161 }
33162 }
33163 }
33164
33165 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C8__SSSE3, n_div_4) {
33166 TEST_REQUIRES_X86_SSSE3;
33167 for (uint32_t n = 8; n <= 12; n += 4) {
33168 for (size_t k = 1; k <= 40; k += 9) {
33169 GemmMicrokernelTester()
33170 .extended_weights(true)
33171 .mr(2)
33172 .nr(4)
33173 .kr(8)
33174 .sr(1)
33175 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080033176 .n(n)
Marat Dukhan0ff79892021-08-06 16:05:06 -070033177 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080033178 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__ssse3, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070033179 }
33180 }
33181 }
33182
33183 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C8__SSSE3, n_div_4_strided_cn) {
33184 TEST_REQUIRES_X86_SSSE3;
33185 for (uint32_t n = 8; n <= 12; n += 4) {
33186 for (size_t k = 1; k <= 40; k += 9) {
33187 GemmMicrokernelTester()
33188 .extended_weights(true)
33189 .mr(2)
33190 .nr(4)
33191 .kr(8)
33192 .sr(1)
33193 .m(2)
33194 .n(n)
33195 .k(k)
33196 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080033197 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__ssse3, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070033198 }
33199 }
33200 }
33201
33202 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C8__SSSE3, n_div_4_strided_a) {
33203 TEST_REQUIRES_X86_SSSE3;
33204 for (uint32_t n = 8; n <= 12; n += 4) {
33205 for (size_t k = 1; k <= 40; k += 9) {
33206 GemmMicrokernelTester()
33207 .extended_weights(true)
33208 .mr(2)
33209 .nr(4)
33210 .kr(8)
33211 .sr(1)
33212 .m(2)
33213 .n(n)
33214 .k(k)
33215 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080033216 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__ssse3, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070033217 }
33218 }
33219 }
33220
33221 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C8__SSSE3, n_div_4_subtile) {
33222 TEST_REQUIRES_X86_SSSE3;
33223 for (uint32_t n = 8; n <= 12; n += 4) {
33224 for (size_t k = 1; k <= 40; k += 9) {
33225 for (uint32_t m = 1; m <= 2; m++) {
33226 GemmMicrokernelTester()
33227 .extended_weights(true)
33228 .mr(2)
33229 .nr(4)
33230 .kr(8)
33231 .sr(1)
33232 .m(m)
33233 .n(n)
33234 .k(k)
33235 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080033236 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__ssse3, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070033237 }
33238 }
33239 }
33240 }
33241
33242 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C8__SSSE3, strided_cm_subtile) {
33243 TEST_REQUIRES_X86_SSSE3;
33244 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080033245 for (uint32_t n = 1; n <= 4; n++) {
33246 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan0ff79892021-08-06 16:05:06 -070033247 GemmMicrokernelTester()
33248 .extended_weights(true)
33249 .mr(2)
33250 .nr(4)
33251 .kr(8)
33252 .sr(1)
33253 .m(m)
33254 .n(n)
33255 .k(k)
33256 .cm_stride(7)
33257 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080033258 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__ssse3, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070033259 }
33260 }
33261 }
33262 }
33263
33264 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C8__SSSE3, strided_cm) {
33265 TEST_REQUIRES_X86_SSSE3;
33266 GemmMicrokernelTester()
33267 .extended_weights(true)
33268 .mr(2)
33269 .nr(4)
33270 .kr(8)
33271 .sr(1)
33272 .m(2)
33273 .n(4)
33274 .k(8)
33275 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080033276 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__ssse3, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070033277 }
33278#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
33279
33280
33281#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan0ff79892021-08-06 16:05:06 -070033282 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C8__SSE41, k_eq_8) {
33283 TEST_REQUIRES_X86_SSE41;
33284 GemmMicrokernelTester()
33285 .extended_weights(true)
33286 .mr(3)
33287 .nr(4)
33288 .kr(8)
33289 .sr(1)
33290 .m(3)
33291 .n(4)
33292 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080033293 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__sse41, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070033294 }
33295
33296 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C8__SSE41, strided_cn) {
33297 TEST_REQUIRES_X86_SSE41;
33298 GemmMicrokernelTester()
33299 .extended_weights(true)
33300 .mr(3)
33301 .nr(4)
33302 .kr(8)
33303 .sr(1)
33304 .m(3)
33305 .n(4)
33306 .k(8)
33307 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080033308 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__sse41, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070033309 }
33310
33311 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C8__SSE41, k_eq_8_strided_a) {
33312 TEST_REQUIRES_X86_SSE41;
33313 GemmMicrokernelTester()
33314 .extended_weights(true)
33315 .mr(3)
33316 .nr(4)
33317 .kr(8)
33318 .sr(1)
33319 .m(3)
33320 .n(4)
33321 .k(8)
33322 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080033323 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__sse41, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070033324 }
33325
33326 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C8__SSE41, k_eq_8_subtile) {
33327 TEST_REQUIRES_X86_SSE41;
Zhi An Ng83844ae2022-01-14 09:52:25 -080033328 for (uint32_t n = 1; n <= 4; n++) {
33329 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan0ff79892021-08-06 16:05:06 -070033330 GemmMicrokernelTester()
33331 .extended_weights(true)
33332 .mr(3)
33333 .nr(4)
33334 .kr(8)
33335 .sr(1)
33336 .m(m)
33337 .n(n)
33338 .k(8)
33339 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080033340 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__sse41, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070033341 }
33342 }
33343 }
33344
33345 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C8__SSE41, k_eq_8_subtile_m) {
33346 TEST_REQUIRES_X86_SSE41;
33347 for (uint32_t m = 1; m <= 3; m++) {
33348 GemmMicrokernelTester()
33349 .extended_weights(true)
33350 .mr(3)
33351 .nr(4)
33352 .kr(8)
33353 .sr(1)
33354 .m(m)
33355 .n(4)
33356 .k(8)
33357 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080033358 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__sse41, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070033359 }
33360 }
33361
33362 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C8__SSE41, k_eq_8_subtile_n) {
33363 TEST_REQUIRES_X86_SSE41;
33364 for (uint32_t n = 1; n <= 4; n++) {
33365 GemmMicrokernelTester()
33366 .extended_weights(true)
33367 .mr(3)
33368 .nr(4)
33369 .kr(8)
33370 .sr(1)
33371 .m(3)
33372 .n(n)
33373 .k(8)
33374 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080033375 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__sse41, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070033376 }
33377 }
33378
33379 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C8__SSE41, k_lt_8) {
33380 TEST_REQUIRES_X86_SSE41;
33381 for (size_t k = 1; k < 8; k++) {
33382 GemmMicrokernelTester()
33383 .extended_weights(true)
33384 .mr(3)
33385 .nr(4)
33386 .kr(8)
33387 .sr(1)
33388 .m(3)
33389 .n(4)
33390 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080033391 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__sse41, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070033392 }
33393 }
33394
33395 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C8__SSE41, k_lt_8_strided_a) {
33396 TEST_REQUIRES_X86_SSE41;
33397 for (size_t k = 1; k < 8; k++) {
33398 GemmMicrokernelTester()
33399 .extended_weights(true)
33400 .mr(3)
33401 .nr(4)
33402 .kr(8)
33403 .sr(1)
33404 .m(3)
33405 .n(4)
33406 .k(k)
33407 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080033408 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__sse41, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070033409 }
33410 }
33411
33412 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C8__SSE41, k_lt_8_subtile) {
33413 TEST_REQUIRES_X86_SSE41;
33414 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080033415 for (uint32_t n = 1; n <= 4; n++) {
33416 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan0ff79892021-08-06 16:05:06 -070033417 GemmMicrokernelTester()
33418 .extended_weights(true)
33419 .mr(3)
33420 .nr(4)
33421 .kr(8)
33422 .sr(1)
33423 .m(m)
33424 .n(n)
33425 .k(k)
33426 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080033427 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__sse41, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070033428 }
33429 }
33430 }
33431 }
33432
33433 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C8__SSE41, k_gt_8) {
33434 TEST_REQUIRES_X86_SSE41;
33435 for (size_t k = 9; k < 16; k++) {
33436 GemmMicrokernelTester()
33437 .extended_weights(true)
33438 .mr(3)
33439 .nr(4)
33440 .kr(8)
33441 .sr(1)
33442 .m(3)
33443 .n(4)
33444 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080033445 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__sse41, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070033446 }
33447 }
33448
33449 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C8__SSE41, k_gt_8_strided_a) {
33450 TEST_REQUIRES_X86_SSE41;
33451 for (size_t k = 9; k < 16; k++) {
33452 GemmMicrokernelTester()
33453 .extended_weights(true)
33454 .mr(3)
33455 .nr(4)
33456 .kr(8)
33457 .sr(1)
33458 .m(3)
33459 .n(4)
33460 .k(k)
33461 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080033462 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__sse41, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070033463 }
33464 }
33465
33466 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C8__SSE41, k_gt_8_subtile) {
33467 TEST_REQUIRES_X86_SSE41;
33468 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080033469 for (uint32_t n = 1; n <= 4; n++) {
33470 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan0ff79892021-08-06 16:05:06 -070033471 GemmMicrokernelTester()
33472 .extended_weights(true)
33473 .mr(3)
33474 .nr(4)
33475 .kr(8)
33476 .sr(1)
33477 .m(m)
33478 .n(n)
33479 .k(k)
33480 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080033481 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__sse41, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070033482 }
33483 }
33484 }
33485 }
33486
33487 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C8__SSE41, k_div_8) {
33488 TEST_REQUIRES_X86_SSE41;
33489 for (size_t k = 16; k <= 80; k += 8) {
33490 GemmMicrokernelTester()
33491 .extended_weights(true)
33492 .mr(3)
33493 .nr(4)
33494 .kr(8)
33495 .sr(1)
33496 .m(3)
33497 .n(4)
33498 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080033499 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__sse41, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070033500 }
33501 }
33502
33503 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C8__SSE41, k_div_8_strided_a) {
33504 TEST_REQUIRES_X86_SSE41;
33505 for (size_t k = 16; k <= 80; k += 8) {
33506 GemmMicrokernelTester()
33507 .extended_weights(true)
33508 .mr(3)
33509 .nr(4)
33510 .kr(8)
33511 .sr(1)
33512 .m(3)
33513 .n(4)
33514 .k(k)
33515 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080033516 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__sse41, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070033517 }
33518 }
33519
33520 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C8__SSE41, k_div_8_subtile) {
33521 TEST_REQUIRES_X86_SSE41;
33522 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080033523 for (uint32_t n = 1; n <= 4; n++) {
33524 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan0ff79892021-08-06 16:05:06 -070033525 GemmMicrokernelTester()
33526 .extended_weights(true)
33527 .mr(3)
33528 .nr(4)
33529 .kr(8)
33530 .sr(1)
33531 .m(m)
33532 .n(n)
33533 .k(k)
33534 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080033535 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__sse41, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070033536 }
33537 }
33538 }
33539 }
33540
33541 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C8__SSE41, n_gt_4) {
33542 TEST_REQUIRES_X86_SSE41;
33543 for (uint32_t n = 5; n < 8; n++) {
33544 for (size_t k = 1; k <= 40; k += 9) {
33545 GemmMicrokernelTester()
33546 .extended_weights(true)
33547 .mr(3)
33548 .nr(4)
33549 .kr(8)
33550 .sr(1)
33551 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080033552 .n(n)
Marat Dukhan0ff79892021-08-06 16:05:06 -070033553 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080033554 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__sse41, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070033555 }
33556 }
33557 }
33558
33559 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C8__SSE41, n_gt_4_strided_cn) {
33560 TEST_REQUIRES_X86_SSE41;
33561 for (uint32_t n = 5; n < 8; n++) {
33562 for (size_t k = 1; k <= 40; k += 9) {
33563 GemmMicrokernelTester()
33564 .extended_weights(true)
33565 .mr(3)
33566 .nr(4)
33567 .kr(8)
33568 .sr(1)
33569 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080033570 .n(n)
Marat Dukhan0ff79892021-08-06 16:05:06 -070033571 .k(k)
33572 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080033573 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__sse41, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070033574 }
33575 }
33576 }
33577
33578 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C8__SSE41, n_gt_4_strided_a) {
33579 TEST_REQUIRES_X86_SSE41;
33580 for (uint32_t n = 5; n < 8; n++) {
33581 for (size_t k = 1; k <= 40; k += 9) {
33582 GemmMicrokernelTester()
33583 .extended_weights(true)
33584 .mr(3)
33585 .nr(4)
33586 .kr(8)
33587 .sr(1)
33588 .m(3)
33589 .n(n)
33590 .k(k)
33591 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080033592 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__sse41, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070033593 }
33594 }
33595 }
33596
33597 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C8__SSE41, n_gt_4_subtile) {
33598 TEST_REQUIRES_X86_SSE41;
33599 for (uint32_t n = 5; n < 8; n++) {
33600 for (size_t k = 1; k <= 40; k += 9) {
33601 for (uint32_t m = 1; m <= 3; m++) {
33602 GemmMicrokernelTester()
33603 .extended_weights(true)
33604 .mr(3)
33605 .nr(4)
33606 .kr(8)
33607 .sr(1)
33608 .m(m)
33609 .n(n)
33610 .k(k)
33611 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080033612 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__sse41, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070033613 }
33614 }
33615 }
33616 }
33617
33618 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C8__SSE41, n_div_4) {
33619 TEST_REQUIRES_X86_SSE41;
33620 for (uint32_t n = 8; n <= 12; n += 4) {
33621 for (size_t k = 1; k <= 40; k += 9) {
33622 GemmMicrokernelTester()
33623 .extended_weights(true)
33624 .mr(3)
33625 .nr(4)
33626 .kr(8)
33627 .sr(1)
33628 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080033629 .n(n)
Marat Dukhan0ff79892021-08-06 16:05:06 -070033630 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080033631 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__sse41, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070033632 }
33633 }
33634 }
33635
33636 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C8__SSE41, n_div_4_strided_cn) {
33637 TEST_REQUIRES_X86_SSE41;
33638 for (uint32_t n = 8; n <= 12; n += 4) {
33639 for (size_t k = 1; k <= 40; k += 9) {
33640 GemmMicrokernelTester()
33641 .extended_weights(true)
33642 .mr(3)
33643 .nr(4)
33644 .kr(8)
33645 .sr(1)
33646 .m(3)
33647 .n(n)
33648 .k(k)
33649 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080033650 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__sse41, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070033651 }
33652 }
33653 }
33654
33655 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C8__SSE41, n_div_4_strided_a) {
33656 TEST_REQUIRES_X86_SSE41;
33657 for (uint32_t n = 8; n <= 12; n += 4) {
33658 for (size_t k = 1; k <= 40; k += 9) {
33659 GemmMicrokernelTester()
33660 .extended_weights(true)
33661 .mr(3)
33662 .nr(4)
33663 .kr(8)
33664 .sr(1)
33665 .m(3)
33666 .n(n)
33667 .k(k)
33668 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080033669 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__sse41, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070033670 }
33671 }
33672 }
33673
33674 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C8__SSE41, n_div_4_subtile) {
33675 TEST_REQUIRES_X86_SSE41;
33676 for (uint32_t n = 8; n <= 12; n += 4) {
33677 for (size_t k = 1; k <= 40; k += 9) {
33678 for (uint32_t m = 1; m <= 3; m++) {
33679 GemmMicrokernelTester()
33680 .extended_weights(true)
33681 .mr(3)
33682 .nr(4)
33683 .kr(8)
33684 .sr(1)
33685 .m(m)
33686 .n(n)
33687 .k(k)
33688 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080033689 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__sse41, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070033690 }
33691 }
33692 }
33693 }
33694
33695 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C8__SSE41, strided_cm_subtile) {
33696 TEST_REQUIRES_X86_SSE41;
33697 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080033698 for (uint32_t n = 1; n <= 4; n++) {
33699 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan0ff79892021-08-06 16:05:06 -070033700 GemmMicrokernelTester()
33701 .extended_weights(true)
33702 .mr(3)
33703 .nr(4)
33704 .kr(8)
33705 .sr(1)
33706 .m(m)
33707 .n(n)
33708 .k(k)
33709 .cm_stride(7)
33710 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080033711 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__sse41, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070033712 }
33713 }
33714 }
33715 }
33716
33717 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C8__SSE41, strided_cm) {
33718 TEST_REQUIRES_X86_SSE41;
33719 GemmMicrokernelTester()
33720 .extended_weights(true)
33721 .mr(3)
33722 .nr(4)
33723 .kr(8)
33724 .sr(1)
33725 .m(3)
33726 .n(4)
33727 .k(8)
33728 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080033729 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__sse41, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070033730 }
33731#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
33732
33733
33734#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan0ff79892021-08-06 16:05:06 -070033735 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C8__AVX, k_eq_8) {
33736 TEST_REQUIRES_X86_AVX;
33737 GemmMicrokernelTester()
33738 .extended_weights(true)
33739 .mr(2)
33740 .nr(4)
33741 .kr(8)
33742 .sr(1)
33743 .m(2)
33744 .n(4)
33745 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080033746 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070033747 }
33748
33749 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C8__AVX, strided_cn) {
33750 TEST_REQUIRES_X86_AVX;
33751 GemmMicrokernelTester()
33752 .extended_weights(true)
33753 .mr(2)
33754 .nr(4)
33755 .kr(8)
33756 .sr(1)
33757 .m(2)
33758 .n(4)
33759 .k(8)
33760 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080033761 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070033762 }
33763
33764 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C8__AVX, k_eq_8_strided_a) {
33765 TEST_REQUIRES_X86_AVX;
33766 GemmMicrokernelTester()
33767 .extended_weights(true)
33768 .mr(2)
33769 .nr(4)
33770 .kr(8)
33771 .sr(1)
33772 .m(2)
33773 .n(4)
33774 .k(8)
33775 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080033776 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070033777 }
33778
33779 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C8__AVX, k_eq_8_subtile) {
33780 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080033781 for (uint32_t n = 1; n <= 4; n++) {
33782 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan0ff79892021-08-06 16:05:06 -070033783 GemmMicrokernelTester()
33784 .extended_weights(true)
33785 .mr(2)
33786 .nr(4)
33787 .kr(8)
33788 .sr(1)
33789 .m(m)
33790 .n(n)
33791 .k(8)
33792 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080033793 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070033794 }
33795 }
33796 }
33797
33798 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C8__AVX, k_eq_8_subtile_m) {
33799 TEST_REQUIRES_X86_AVX;
33800 for (uint32_t m = 1; m <= 2; m++) {
33801 GemmMicrokernelTester()
33802 .extended_weights(true)
33803 .mr(2)
33804 .nr(4)
33805 .kr(8)
33806 .sr(1)
33807 .m(m)
33808 .n(4)
33809 .k(8)
33810 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080033811 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070033812 }
33813 }
33814
33815 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C8__AVX, k_eq_8_subtile_n) {
33816 TEST_REQUIRES_X86_AVX;
33817 for (uint32_t n = 1; n <= 4; n++) {
33818 GemmMicrokernelTester()
33819 .extended_weights(true)
33820 .mr(2)
33821 .nr(4)
33822 .kr(8)
33823 .sr(1)
33824 .m(2)
33825 .n(n)
33826 .k(8)
33827 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080033828 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070033829 }
33830 }
33831
33832 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C8__AVX, k_lt_8) {
33833 TEST_REQUIRES_X86_AVX;
33834 for (size_t k = 1; k < 8; k++) {
33835 GemmMicrokernelTester()
33836 .extended_weights(true)
33837 .mr(2)
33838 .nr(4)
33839 .kr(8)
33840 .sr(1)
33841 .m(2)
33842 .n(4)
33843 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080033844 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070033845 }
33846 }
33847
33848 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C8__AVX, k_lt_8_strided_a) {
33849 TEST_REQUIRES_X86_AVX;
33850 for (size_t k = 1; k < 8; k++) {
33851 GemmMicrokernelTester()
33852 .extended_weights(true)
33853 .mr(2)
33854 .nr(4)
33855 .kr(8)
33856 .sr(1)
33857 .m(2)
33858 .n(4)
33859 .k(k)
33860 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080033861 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070033862 }
33863 }
33864
33865 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C8__AVX, k_lt_8_subtile) {
33866 TEST_REQUIRES_X86_AVX;
33867 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080033868 for (uint32_t n = 1; n <= 4; n++) {
33869 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan0ff79892021-08-06 16:05:06 -070033870 GemmMicrokernelTester()
33871 .extended_weights(true)
33872 .mr(2)
33873 .nr(4)
33874 .kr(8)
33875 .sr(1)
33876 .m(m)
33877 .n(n)
33878 .k(k)
33879 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080033880 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070033881 }
33882 }
33883 }
33884 }
33885
33886 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C8__AVX, k_gt_8) {
33887 TEST_REQUIRES_X86_AVX;
33888 for (size_t k = 9; k < 16; k++) {
33889 GemmMicrokernelTester()
33890 .extended_weights(true)
33891 .mr(2)
33892 .nr(4)
33893 .kr(8)
33894 .sr(1)
33895 .m(2)
33896 .n(4)
33897 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080033898 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070033899 }
33900 }
33901
33902 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C8__AVX, k_gt_8_strided_a) {
33903 TEST_REQUIRES_X86_AVX;
33904 for (size_t k = 9; k < 16; k++) {
33905 GemmMicrokernelTester()
33906 .extended_weights(true)
33907 .mr(2)
33908 .nr(4)
33909 .kr(8)
33910 .sr(1)
33911 .m(2)
33912 .n(4)
33913 .k(k)
33914 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080033915 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070033916 }
33917 }
33918
33919 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C8__AVX, k_gt_8_subtile) {
33920 TEST_REQUIRES_X86_AVX;
33921 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080033922 for (uint32_t n = 1; n <= 4; n++) {
33923 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan0ff79892021-08-06 16:05:06 -070033924 GemmMicrokernelTester()
33925 .extended_weights(true)
33926 .mr(2)
33927 .nr(4)
33928 .kr(8)
33929 .sr(1)
33930 .m(m)
33931 .n(n)
33932 .k(k)
33933 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080033934 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070033935 }
33936 }
33937 }
33938 }
33939
33940 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C8__AVX, k_div_8) {
33941 TEST_REQUIRES_X86_AVX;
33942 for (size_t k = 16; k <= 80; k += 8) {
33943 GemmMicrokernelTester()
33944 .extended_weights(true)
33945 .mr(2)
33946 .nr(4)
33947 .kr(8)
33948 .sr(1)
33949 .m(2)
33950 .n(4)
33951 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080033952 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070033953 }
33954 }
33955
33956 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C8__AVX, k_div_8_strided_a) {
33957 TEST_REQUIRES_X86_AVX;
33958 for (size_t k = 16; k <= 80; k += 8) {
33959 GemmMicrokernelTester()
33960 .extended_weights(true)
33961 .mr(2)
33962 .nr(4)
33963 .kr(8)
33964 .sr(1)
33965 .m(2)
33966 .n(4)
33967 .k(k)
33968 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080033969 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070033970 }
33971 }
33972
33973 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C8__AVX, k_div_8_subtile) {
33974 TEST_REQUIRES_X86_AVX;
33975 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080033976 for (uint32_t n = 1; n <= 4; n++) {
33977 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan0ff79892021-08-06 16:05:06 -070033978 GemmMicrokernelTester()
33979 .extended_weights(true)
33980 .mr(2)
33981 .nr(4)
33982 .kr(8)
33983 .sr(1)
33984 .m(m)
33985 .n(n)
33986 .k(k)
33987 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080033988 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070033989 }
33990 }
33991 }
33992 }
33993
33994 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C8__AVX, n_gt_4) {
33995 TEST_REQUIRES_X86_AVX;
33996 for (uint32_t n = 5; n < 8; n++) {
33997 for (size_t k = 1; k <= 40; k += 9) {
33998 GemmMicrokernelTester()
33999 .extended_weights(true)
34000 .mr(2)
34001 .nr(4)
34002 .kr(8)
34003 .sr(1)
34004 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080034005 .n(n)
Marat Dukhan0ff79892021-08-06 16:05:06 -070034006 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080034007 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070034008 }
34009 }
34010 }
34011
34012 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C8__AVX, n_gt_4_strided_cn) {
34013 TEST_REQUIRES_X86_AVX;
34014 for (uint32_t n = 5; n < 8; n++) {
34015 for (size_t k = 1; k <= 40; k += 9) {
34016 GemmMicrokernelTester()
34017 .extended_weights(true)
34018 .mr(2)
34019 .nr(4)
34020 .kr(8)
34021 .sr(1)
34022 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080034023 .n(n)
Marat Dukhan0ff79892021-08-06 16:05:06 -070034024 .k(k)
34025 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080034026 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070034027 }
34028 }
34029 }
34030
34031 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C8__AVX, n_gt_4_strided_a) {
34032 TEST_REQUIRES_X86_AVX;
34033 for (uint32_t n = 5; n < 8; n++) {
34034 for (size_t k = 1; k <= 40; k += 9) {
34035 GemmMicrokernelTester()
34036 .extended_weights(true)
34037 .mr(2)
34038 .nr(4)
34039 .kr(8)
34040 .sr(1)
34041 .m(2)
34042 .n(n)
34043 .k(k)
34044 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080034045 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070034046 }
34047 }
34048 }
34049
34050 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C8__AVX, n_gt_4_subtile) {
34051 TEST_REQUIRES_X86_AVX;
34052 for (uint32_t n = 5; n < 8; n++) {
34053 for (size_t k = 1; k <= 40; k += 9) {
34054 for (uint32_t m = 1; m <= 2; m++) {
34055 GemmMicrokernelTester()
34056 .extended_weights(true)
34057 .mr(2)
34058 .nr(4)
34059 .kr(8)
34060 .sr(1)
34061 .m(m)
34062 .n(n)
34063 .k(k)
34064 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080034065 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070034066 }
34067 }
34068 }
34069 }
34070
34071 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C8__AVX, n_div_4) {
34072 TEST_REQUIRES_X86_AVX;
34073 for (uint32_t n = 8; n <= 12; n += 4) {
34074 for (size_t k = 1; k <= 40; k += 9) {
34075 GemmMicrokernelTester()
34076 .extended_weights(true)
34077 .mr(2)
34078 .nr(4)
34079 .kr(8)
34080 .sr(1)
34081 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080034082 .n(n)
Marat Dukhan0ff79892021-08-06 16:05:06 -070034083 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080034084 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070034085 }
34086 }
34087 }
34088
34089 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C8__AVX, n_div_4_strided_cn) {
34090 TEST_REQUIRES_X86_AVX;
34091 for (uint32_t n = 8; n <= 12; n += 4) {
34092 for (size_t k = 1; k <= 40; k += 9) {
34093 GemmMicrokernelTester()
34094 .extended_weights(true)
34095 .mr(2)
34096 .nr(4)
34097 .kr(8)
34098 .sr(1)
34099 .m(2)
34100 .n(n)
34101 .k(k)
34102 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080034103 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070034104 }
34105 }
34106 }
34107
34108 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C8__AVX, n_div_4_strided_a) {
34109 TEST_REQUIRES_X86_AVX;
34110 for (uint32_t n = 8; n <= 12; n += 4) {
34111 for (size_t k = 1; k <= 40; k += 9) {
34112 GemmMicrokernelTester()
34113 .extended_weights(true)
34114 .mr(2)
34115 .nr(4)
34116 .kr(8)
34117 .sr(1)
34118 .m(2)
34119 .n(n)
34120 .k(k)
34121 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080034122 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070034123 }
34124 }
34125 }
34126
34127 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C8__AVX, n_div_4_subtile) {
34128 TEST_REQUIRES_X86_AVX;
34129 for (uint32_t n = 8; n <= 12; n += 4) {
34130 for (size_t k = 1; k <= 40; k += 9) {
34131 for (uint32_t m = 1; m <= 2; m++) {
34132 GemmMicrokernelTester()
34133 .extended_weights(true)
34134 .mr(2)
34135 .nr(4)
34136 .kr(8)
34137 .sr(1)
34138 .m(m)
34139 .n(n)
34140 .k(k)
34141 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080034142 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070034143 }
34144 }
34145 }
34146 }
34147
34148 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C8__AVX, strided_cm_subtile) {
34149 TEST_REQUIRES_X86_AVX;
34150 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080034151 for (uint32_t n = 1; n <= 4; n++) {
34152 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan0ff79892021-08-06 16:05:06 -070034153 GemmMicrokernelTester()
34154 .extended_weights(true)
34155 .mr(2)
34156 .nr(4)
34157 .kr(8)
34158 .sr(1)
34159 .m(m)
34160 .n(n)
34161 .k(k)
34162 .cm_stride(7)
34163 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080034164 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070034165 }
34166 }
34167 }
34168 }
34169
34170 TEST(QS8_GEMM_XW_MINMAX_FP32_2X4C8__AVX, strided_cm) {
34171 TEST_REQUIRES_X86_AVX;
34172 GemmMicrokernelTester()
34173 .extended_weights(true)
34174 .mr(2)
34175 .nr(4)
34176 .kr(8)
34177 .sr(1)
34178 .m(2)
34179 .n(4)
34180 .k(8)
34181 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080034182 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070034183 }
34184#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
34185
34186
34187#if XNN_ARCH_X86 || XNN_ARCH_X86_64
34188 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C8__AVX, k_eq_8) {
34189 TEST_REQUIRES_X86_AVX;
34190 GemmMicrokernelTester()
34191 .extended_weights(true)
34192 .mr(3)
34193 .nr(4)
34194 .kr(8)
34195 .sr(1)
34196 .m(3)
34197 .n(4)
34198 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080034199 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070034200 }
34201
34202 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C8__AVX, strided_cn) {
34203 TEST_REQUIRES_X86_AVX;
34204 GemmMicrokernelTester()
34205 .extended_weights(true)
34206 .mr(3)
34207 .nr(4)
34208 .kr(8)
34209 .sr(1)
34210 .m(3)
34211 .n(4)
34212 .k(8)
34213 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080034214 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070034215 }
34216
34217 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C8__AVX, k_eq_8_strided_a) {
34218 TEST_REQUIRES_X86_AVX;
34219 GemmMicrokernelTester()
34220 .extended_weights(true)
34221 .mr(3)
34222 .nr(4)
34223 .kr(8)
34224 .sr(1)
34225 .m(3)
34226 .n(4)
34227 .k(8)
34228 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080034229 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070034230 }
34231
34232 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C8__AVX, k_eq_8_subtile) {
34233 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080034234 for (uint32_t n = 1; n <= 4; n++) {
34235 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan0ff79892021-08-06 16:05:06 -070034236 GemmMicrokernelTester()
34237 .extended_weights(true)
34238 .mr(3)
34239 .nr(4)
34240 .kr(8)
34241 .sr(1)
34242 .m(m)
34243 .n(n)
34244 .k(8)
34245 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080034246 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070034247 }
34248 }
34249 }
34250
34251 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C8__AVX, k_eq_8_subtile_m) {
34252 TEST_REQUIRES_X86_AVX;
34253 for (uint32_t m = 1; m <= 3; m++) {
34254 GemmMicrokernelTester()
34255 .extended_weights(true)
34256 .mr(3)
34257 .nr(4)
34258 .kr(8)
34259 .sr(1)
34260 .m(m)
34261 .n(4)
34262 .k(8)
34263 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080034264 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070034265 }
34266 }
34267
34268 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C8__AVX, k_eq_8_subtile_n) {
34269 TEST_REQUIRES_X86_AVX;
34270 for (uint32_t n = 1; n <= 4; n++) {
34271 GemmMicrokernelTester()
34272 .extended_weights(true)
34273 .mr(3)
34274 .nr(4)
34275 .kr(8)
34276 .sr(1)
34277 .m(3)
34278 .n(n)
34279 .k(8)
34280 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080034281 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070034282 }
34283 }
34284
34285 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C8__AVX, k_lt_8) {
34286 TEST_REQUIRES_X86_AVX;
34287 for (size_t k = 1; k < 8; k++) {
34288 GemmMicrokernelTester()
34289 .extended_weights(true)
34290 .mr(3)
34291 .nr(4)
34292 .kr(8)
34293 .sr(1)
34294 .m(3)
34295 .n(4)
34296 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080034297 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070034298 }
34299 }
34300
34301 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C8__AVX, k_lt_8_strided_a) {
34302 TEST_REQUIRES_X86_AVX;
34303 for (size_t k = 1; k < 8; k++) {
34304 GemmMicrokernelTester()
34305 .extended_weights(true)
34306 .mr(3)
34307 .nr(4)
34308 .kr(8)
34309 .sr(1)
34310 .m(3)
34311 .n(4)
34312 .k(k)
34313 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080034314 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070034315 }
34316 }
34317
34318 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C8__AVX, k_lt_8_subtile) {
34319 TEST_REQUIRES_X86_AVX;
34320 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080034321 for (uint32_t n = 1; n <= 4; n++) {
34322 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan0ff79892021-08-06 16:05:06 -070034323 GemmMicrokernelTester()
34324 .extended_weights(true)
34325 .mr(3)
34326 .nr(4)
34327 .kr(8)
34328 .sr(1)
34329 .m(m)
34330 .n(n)
34331 .k(k)
34332 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080034333 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070034334 }
34335 }
34336 }
34337 }
34338
34339 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C8__AVX, k_gt_8) {
34340 TEST_REQUIRES_X86_AVX;
34341 for (size_t k = 9; k < 16; k++) {
34342 GemmMicrokernelTester()
34343 .extended_weights(true)
34344 .mr(3)
34345 .nr(4)
34346 .kr(8)
34347 .sr(1)
34348 .m(3)
34349 .n(4)
34350 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080034351 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070034352 }
34353 }
34354
34355 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C8__AVX, k_gt_8_strided_a) {
34356 TEST_REQUIRES_X86_AVX;
34357 for (size_t k = 9; k < 16; k++) {
34358 GemmMicrokernelTester()
34359 .extended_weights(true)
34360 .mr(3)
34361 .nr(4)
34362 .kr(8)
34363 .sr(1)
34364 .m(3)
34365 .n(4)
34366 .k(k)
34367 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080034368 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070034369 }
34370 }
34371
34372 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C8__AVX, k_gt_8_subtile) {
34373 TEST_REQUIRES_X86_AVX;
34374 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080034375 for (uint32_t n = 1; n <= 4; n++) {
34376 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan0ff79892021-08-06 16:05:06 -070034377 GemmMicrokernelTester()
34378 .extended_weights(true)
34379 .mr(3)
34380 .nr(4)
34381 .kr(8)
34382 .sr(1)
34383 .m(m)
34384 .n(n)
34385 .k(k)
34386 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080034387 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070034388 }
34389 }
34390 }
34391 }
34392
34393 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C8__AVX, k_div_8) {
34394 TEST_REQUIRES_X86_AVX;
34395 for (size_t k = 16; k <= 80; k += 8) {
34396 GemmMicrokernelTester()
34397 .extended_weights(true)
34398 .mr(3)
34399 .nr(4)
34400 .kr(8)
34401 .sr(1)
34402 .m(3)
34403 .n(4)
34404 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080034405 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070034406 }
34407 }
34408
34409 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C8__AVX, k_div_8_strided_a) {
34410 TEST_REQUIRES_X86_AVX;
34411 for (size_t k = 16; k <= 80; k += 8) {
34412 GemmMicrokernelTester()
34413 .extended_weights(true)
34414 .mr(3)
34415 .nr(4)
34416 .kr(8)
34417 .sr(1)
34418 .m(3)
34419 .n(4)
34420 .k(k)
34421 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080034422 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070034423 }
34424 }
34425
34426 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C8__AVX, k_div_8_subtile) {
34427 TEST_REQUIRES_X86_AVX;
34428 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080034429 for (uint32_t n = 1; n <= 4; n++) {
34430 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan0ff79892021-08-06 16:05:06 -070034431 GemmMicrokernelTester()
34432 .extended_weights(true)
34433 .mr(3)
34434 .nr(4)
34435 .kr(8)
34436 .sr(1)
34437 .m(m)
34438 .n(n)
34439 .k(k)
34440 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080034441 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070034442 }
34443 }
34444 }
34445 }
34446
34447 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C8__AVX, n_gt_4) {
34448 TEST_REQUIRES_X86_AVX;
34449 for (uint32_t n = 5; n < 8; n++) {
34450 for (size_t k = 1; k <= 40; k += 9) {
34451 GemmMicrokernelTester()
34452 .extended_weights(true)
34453 .mr(3)
34454 .nr(4)
34455 .kr(8)
34456 .sr(1)
34457 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080034458 .n(n)
Marat Dukhan0ff79892021-08-06 16:05:06 -070034459 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080034460 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070034461 }
34462 }
34463 }
34464
34465 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C8__AVX, n_gt_4_strided_cn) {
34466 TEST_REQUIRES_X86_AVX;
34467 for (uint32_t n = 5; n < 8; n++) {
34468 for (size_t k = 1; k <= 40; k += 9) {
34469 GemmMicrokernelTester()
34470 .extended_weights(true)
34471 .mr(3)
34472 .nr(4)
34473 .kr(8)
34474 .sr(1)
34475 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080034476 .n(n)
Marat Dukhan0ff79892021-08-06 16:05:06 -070034477 .k(k)
34478 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080034479 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070034480 }
34481 }
34482 }
34483
34484 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C8__AVX, n_gt_4_strided_a) {
34485 TEST_REQUIRES_X86_AVX;
34486 for (uint32_t n = 5; n < 8; n++) {
34487 for (size_t k = 1; k <= 40; k += 9) {
34488 GemmMicrokernelTester()
34489 .extended_weights(true)
34490 .mr(3)
34491 .nr(4)
34492 .kr(8)
34493 .sr(1)
34494 .m(3)
34495 .n(n)
34496 .k(k)
34497 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080034498 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070034499 }
34500 }
34501 }
34502
34503 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C8__AVX, n_gt_4_subtile) {
34504 TEST_REQUIRES_X86_AVX;
34505 for (uint32_t n = 5; n < 8; n++) {
34506 for (size_t k = 1; k <= 40; k += 9) {
34507 for (uint32_t m = 1; m <= 3; m++) {
34508 GemmMicrokernelTester()
34509 .extended_weights(true)
34510 .mr(3)
34511 .nr(4)
34512 .kr(8)
34513 .sr(1)
34514 .m(m)
34515 .n(n)
34516 .k(k)
34517 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080034518 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070034519 }
34520 }
34521 }
34522 }
34523
34524 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C8__AVX, n_div_4) {
34525 TEST_REQUIRES_X86_AVX;
34526 for (uint32_t n = 8; n <= 12; n += 4) {
34527 for (size_t k = 1; k <= 40; k += 9) {
34528 GemmMicrokernelTester()
34529 .extended_weights(true)
34530 .mr(3)
34531 .nr(4)
34532 .kr(8)
34533 .sr(1)
34534 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080034535 .n(n)
Marat Dukhan0ff79892021-08-06 16:05:06 -070034536 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080034537 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070034538 }
34539 }
34540 }
34541
34542 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C8__AVX, n_div_4_strided_cn) {
34543 TEST_REQUIRES_X86_AVX;
34544 for (uint32_t n = 8; n <= 12; n += 4) {
34545 for (size_t k = 1; k <= 40; k += 9) {
34546 GemmMicrokernelTester()
34547 .extended_weights(true)
34548 .mr(3)
34549 .nr(4)
34550 .kr(8)
34551 .sr(1)
34552 .m(3)
34553 .n(n)
34554 .k(k)
34555 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080034556 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070034557 }
34558 }
34559 }
34560
34561 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C8__AVX, n_div_4_strided_a) {
34562 TEST_REQUIRES_X86_AVX;
34563 for (uint32_t n = 8; n <= 12; n += 4) {
34564 for (size_t k = 1; k <= 40; k += 9) {
34565 GemmMicrokernelTester()
34566 .extended_weights(true)
34567 .mr(3)
34568 .nr(4)
34569 .kr(8)
34570 .sr(1)
34571 .m(3)
34572 .n(n)
34573 .k(k)
34574 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080034575 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070034576 }
34577 }
34578 }
34579
34580 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C8__AVX, n_div_4_subtile) {
34581 TEST_REQUIRES_X86_AVX;
34582 for (uint32_t n = 8; n <= 12; n += 4) {
34583 for (size_t k = 1; k <= 40; k += 9) {
34584 for (uint32_t m = 1; m <= 3; m++) {
34585 GemmMicrokernelTester()
34586 .extended_weights(true)
34587 .mr(3)
34588 .nr(4)
34589 .kr(8)
34590 .sr(1)
34591 .m(m)
34592 .n(n)
34593 .k(k)
34594 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080034595 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070034596 }
34597 }
34598 }
34599 }
34600
34601 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C8__AVX, strided_cm_subtile) {
34602 TEST_REQUIRES_X86_AVX;
34603 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080034604 for (uint32_t n = 1; n <= 4; n++) {
34605 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan0ff79892021-08-06 16:05:06 -070034606 GemmMicrokernelTester()
34607 .extended_weights(true)
34608 .mr(3)
34609 .nr(4)
34610 .kr(8)
34611 .sr(1)
34612 .m(m)
34613 .n(n)
34614 .k(k)
34615 .cm_stride(7)
34616 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080034617 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070034618 }
34619 }
34620 }
34621 }
34622
34623 TEST(QS8_GEMM_XW_MINMAX_FP32_3X4C8__AVX, strided_cm) {
34624 TEST_REQUIRES_X86_AVX;
34625 GemmMicrokernelTester()
34626 .extended_weights(true)
34627 .mr(3)
34628 .nr(4)
34629 .kr(8)
34630 .sr(1)
34631 .m(3)
34632 .n(4)
34633 .k(8)
34634 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080034635 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__avx, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070034636 }
34637#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
34638
34639
34640#if XNN_ARCH_X86 || XNN_ARCH_X86_64
34641 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__XOP, k_eq_8) {
34642 TEST_REQUIRES_X86_XOP;
34643 GemmMicrokernelTester()
34644 .extended_weights(true)
34645 .mr(1)
34646 .nr(4)
34647 .kr(8)
34648 .sr(1)
34649 .m(1)
34650 .n(4)
34651 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080034652 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__xop, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070034653 }
34654
34655 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__XOP, strided_cn) {
34656 TEST_REQUIRES_X86_XOP;
34657 GemmMicrokernelTester()
34658 .extended_weights(true)
34659 .mr(1)
34660 .nr(4)
34661 .kr(8)
34662 .sr(1)
34663 .m(1)
34664 .n(4)
34665 .k(8)
34666 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080034667 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__xop, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070034668 }
34669
34670 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__XOP, k_eq_8_strided_a) {
34671 TEST_REQUIRES_X86_XOP;
34672 GemmMicrokernelTester()
34673 .extended_weights(true)
34674 .mr(1)
34675 .nr(4)
34676 .kr(8)
34677 .sr(1)
34678 .m(1)
34679 .n(4)
34680 .k(8)
34681 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080034682 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__xop, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070034683 }
34684
34685 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__XOP, k_eq_8_subtile) {
34686 TEST_REQUIRES_X86_XOP;
Zhi An Ng83844ae2022-01-14 09:52:25 -080034687 for (uint32_t n = 1; n <= 4; n++) {
34688 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan0ff79892021-08-06 16:05:06 -070034689 GemmMicrokernelTester()
34690 .extended_weights(true)
34691 .mr(1)
34692 .nr(4)
34693 .kr(8)
34694 .sr(1)
34695 .m(m)
34696 .n(n)
34697 .k(8)
34698 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080034699 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__xop, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070034700 }
34701 }
34702 }
34703
34704 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__XOP, k_eq_8_subtile_m) {
34705 TEST_REQUIRES_X86_XOP;
34706 for (uint32_t m = 1; m <= 1; m++) {
34707 GemmMicrokernelTester()
34708 .extended_weights(true)
34709 .mr(1)
34710 .nr(4)
34711 .kr(8)
34712 .sr(1)
34713 .m(m)
34714 .n(4)
34715 .k(8)
34716 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080034717 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__xop, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070034718 }
34719 }
34720
34721 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__XOP, k_eq_8_subtile_n) {
34722 TEST_REQUIRES_X86_XOP;
34723 for (uint32_t n = 1; n <= 4; n++) {
34724 GemmMicrokernelTester()
34725 .extended_weights(true)
34726 .mr(1)
34727 .nr(4)
34728 .kr(8)
34729 .sr(1)
34730 .m(1)
34731 .n(n)
34732 .k(8)
34733 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080034734 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__xop, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070034735 }
34736 }
34737
34738 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__XOP, k_lt_8) {
34739 TEST_REQUIRES_X86_XOP;
34740 for (size_t k = 1; k < 8; k++) {
34741 GemmMicrokernelTester()
34742 .extended_weights(true)
34743 .mr(1)
34744 .nr(4)
34745 .kr(8)
34746 .sr(1)
34747 .m(1)
34748 .n(4)
34749 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080034750 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__xop, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070034751 }
34752 }
34753
34754 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__XOP, k_lt_8_strided_a) {
34755 TEST_REQUIRES_X86_XOP;
34756 for (size_t k = 1; k < 8; k++) {
34757 GemmMicrokernelTester()
34758 .extended_weights(true)
34759 .mr(1)
34760 .nr(4)
34761 .kr(8)
34762 .sr(1)
34763 .m(1)
34764 .n(4)
34765 .k(k)
34766 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080034767 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__xop, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070034768 }
34769 }
34770
34771 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__XOP, k_lt_8_subtile) {
34772 TEST_REQUIRES_X86_XOP;
34773 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080034774 for (uint32_t n = 1; n <= 4; n++) {
34775 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan0ff79892021-08-06 16:05:06 -070034776 GemmMicrokernelTester()
34777 .extended_weights(true)
34778 .mr(1)
34779 .nr(4)
34780 .kr(8)
34781 .sr(1)
34782 .m(m)
34783 .n(n)
34784 .k(k)
34785 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080034786 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__xop, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070034787 }
34788 }
34789 }
34790 }
34791
34792 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__XOP, k_gt_8) {
34793 TEST_REQUIRES_X86_XOP;
34794 for (size_t k = 9; k < 16; k++) {
34795 GemmMicrokernelTester()
34796 .extended_weights(true)
34797 .mr(1)
34798 .nr(4)
34799 .kr(8)
34800 .sr(1)
34801 .m(1)
34802 .n(4)
34803 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080034804 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__xop, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070034805 }
34806 }
34807
34808 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__XOP, k_gt_8_strided_a) {
34809 TEST_REQUIRES_X86_XOP;
34810 for (size_t k = 9; k < 16; k++) {
34811 GemmMicrokernelTester()
34812 .extended_weights(true)
34813 .mr(1)
34814 .nr(4)
34815 .kr(8)
34816 .sr(1)
34817 .m(1)
34818 .n(4)
34819 .k(k)
34820 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080034821 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__xop, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070034822 }
34823 }
34824
34825 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__XOP, k_gt_8_subtile) {
34826 TEST_REQUIRES_X86_XOP;
34827 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080034828 for (uint32_t n = 1; n <= 4; n++) {
34829 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan0ff79892021-08-06 16:05:06 -070034830 GemmMicrokernelTester()
34831 .extended_weights(true)
34832 .mr(1)
34833 .nr(4)
34834 .kr(8)
34835 .sr(1)
34836 .m(m)
34837 .n(n)
34838 .k(k)
34839 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080034840 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__xop, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070034841 }
34842 }
34843 }
34844 }
34845
34846 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__XOP, k_div_8) {
34847 TEST_REQUIRES_X86_XOP;
34848 for (size_t k = 16; k <= 80; k += 8) {
34849 GemmMicrokernelTester()
34850 .extended_weights(true)
34851 .mr(1)
34852 .nr(4)
34853 .kr(8)
34854 .sr(1)
34855 .m(1)
34856 .n(4)
34857 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080034858 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__xop, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070034859 }
34860 }
34861
34862 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__XOP, k_div_8_strided_a) {
34863 TEST_REQUIRES_X86_XOP;
34864 for (size_t k = 16; k <= 80; k += 8) {
34865 GemmMicrokernelTester()
34866 .extended_weights(true)
34867 .mr(1)
34868 .nr(4)
34869 .kr(8)
34870 .sr(1)
34871 .m(1)
34872 .n(4)
34873 .k(k)
34874 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080034875 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__xop, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070034876 }
34877 }
34878
34879 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__XOP, k_div_8_subtile) {
34880 TEST_REQUIRES_X86_XOP;
34881 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080034882 for (uint32_t n = 1; n <= 4; n++) {
34883 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan0ff79892021-08-06 16:05:06 -070034884 GemmMicrokernelTester()
34885 .extended_weights(true)
34886 .mr(1)
34887 .nr(4)
34888 .kr(8)
34889 .sr(1)
34890 .m(m)
34891 .n(n)
34892 .k(k)
34893 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080034894 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__xop, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070034895 }
34896 }
34897 }
34898 }
34899
34900 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__XOP, n_gt_4) {
34901 TEST_REQUIRES_X86_XOP;
34902 for (uint32_t n = 5; n < 8; n++) {
34903 for (size_t k = 1; k <= 40; k += 9) {
34904 GemmMicrokernelTester()
34905 .extended_weights(true)
34906 .mr(1)
34907 .nr(4)
34908 .kr(8)
34909 .sr(1)
34910 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080034911 .n(n)
Marat Dukhan0ff79892021-08-06 16:05:06 -070034912 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080034913 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__xop, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070034914 }
34915 }
34916 }
34917
34918 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__XOP, n_gt_4_strided_cn) {
34919 TEST_REQUIRES_X86_XOP;
34920 for (uint32_t n = 5; n < 8; n++) {
34921 for (size_t k = 1; k <= 40; k += 9) {
34922 GemmMicrokernelTester()
34923 .extended_weights(true)
34924 .mr(1)
34925 .nr(4)
34926 .kr(8)
34927 .sr(1)
34928 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080034929 .n(n)
Marat Dukhan0ff79892021-08-06 16:05:06 -070034930 .k(k)
34931 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080034932 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__xop, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070034933 }
34934 }
34935 }
34936
34937 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__XOP, n_gt_4_strided_a) {
34938 TEST_REQUIRES_X86_XOP;
34939 for (uint32_t n = 5; n < 8; n++) {
34940 for (size_t k = 1; k <= 40; k += 9) {
34941 GemmMicrokernelTester()
34942 .extended_weights(true)
34943 .mr(1)
34944 .nr(4)
34945 .kr(8)
34946 .sr(1)
34947 .m(1)
34948 .n(n)
34949 .k(k)
34950 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080034951 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__xop, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070034952 }
34953 }
34954 }
34955
34956 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__XOP, n_gt_4_subtile) {
34957 TEST_REQUIRES_X86_XOP;
34958 for (uint32_t n = 5; n < 8; n++) {
34959 for (size_t k = 1; k <= 40; k += 9) {
34960 for (uint32_t m = 1; m <= 1; m++) {
34961 GemmMicrokernelTester()
34962 .extended_weights(true)
34963 .mr(1)
34964 .nr(4)
34965 .kr(8)
34966 .sr(1)
34967 .m(m)
34968 .n(n)
34969 .k(k)
34970 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080034971 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__xop, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070034972 }
34973 }
34974 }
34975 }
34976
34977 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__XOP, n_div_4) {
34978 TEST_REQUIRES_X86_XOP;
34979 for (uint32_t n = 8; n <= 12; n += 4) {
34980 for (size_t k = 1; k <= 40; k += 9) {
34981 GemmMicrokernelTester()
34982 .extended_weights(true)
34983 .mr(1)
34984 .nr(4)
34985 .kr(8)
34986 .sr(1)
34987 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080034988 .n(n)
Marat Dukhan0ff79892021-08-06 16:05:06 -070034989 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080034990 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__xop, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070034991 }
34992 }
34993 }
34994
34995 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__XOP, n_div_4_strided_cn) {
34996 TEST_REQUIRES_X86_XOP;
34997 for (uint32_t n = 8; n <= 12; n += 4) {
34998 for (size_t k = 1; k <= 40; k += 9) {
34999 GemmMicrokernelTester()
35000 .extended_weights(true)
35001 .mr(1)
35002 .nr(4)
35003 .kr(8)
35004 .sr(1)
35005 .m(1)
35006 .n(n)
35007 .k(k)
35008 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080035009 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__xop, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070035010 }
35011 }
35012 }
35013
35014 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__XOP, n_div_4_strided_a) {
35015 TEST_REQUIRES_X86_XOP;
35016 for (uint32_t n = 8; n <= 12; n += 4) {
35017 for (size_t k = 1; k <= 40; k += 9) {
35018 GemmMicrokernelTester()
35019 .extended_weights(true)
35020 .mr(1)
35021 .nr(4)
35022 .kr(8)
35023 .sr(1)
35024 .m(1)
35025 .n(n)
35026 .k(k)
35027 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080035028 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__xop, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070035029 }
35030 }
35031 }
35032
35033 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__XOP, n_div_4_subtile) {
35034 TEST_REQUIRES_X86_XOP;
35035 for (uint32_t n = 8; n <= 12; n += 4) {
35036 for (size_t k = 1; k <= 40; k += 9) {
35037 for (uint32_t m = 1; m <= 1; m++) {
35038 GemmMicrokernelTester()
35039 .extended_weights(true)
35040 .mr(1)
35041 .nr(4)
35042 .kr(8)
35043 .sr(1)
35044 .m(m)
35045 .n(n)
35046 .k(k)
35047 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080035048 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__xop, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070035049 }
35050 }
35051 }
35052 }
35053
35054 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__XOP, strided_cm_subtile) {
35055 TEST_REQUIRES_X86_XOP;
35056 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080035057 for (uint32_t n = 1; n <= 4; n++) {
35058 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan0ff79892021-08-06 16:05:06 -070035059 GemmMicrokernelTester()
35060 .extended_weights(true)
35061 .mr(1)
35062 .nr(4)
35063 .kr(8)
35064 .sr(1)
35065 .m(m)
35066 .n(n)
35067 .k(k)
35068 .cm_stride(7)
35069 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080035070 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__xop, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070035071 }
35072 }
35073 }
35074 }
35075
35076 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__XOP, strided_cm) {
35077 TEST_REQUIRES_X86_XOP;
35078 GemmMicrokernelTester()
35079 .extended_weights(true)
35080 .mr(1)
35081 .nr(4)
35082 .kr(8)
35083 .sr(1)
35084 .m(1)
35085 .n(4)
35086 .k(8)
35087 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080035088 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__xop, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
Marat Dukhan0ff79892021-08-06 16:05:06 -070035089 }
35090#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
35091
35092
35093#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan9b474cf2021-05-25 16:37:48 -070035094 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_eq_8) {
35095 TEST_REQUIRES_X86_AVX2;
35096 GemmMicrokernelTester()
35097 .mr(2)
35098 .nr(8)
35099 .kr(8)
35100 .sr(1)
35101 .m(2)
35102 .n(8)
35103 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080035104 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070035105 }
35106
35107 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AVX2, strided_cn) {
35108 TEST_REQUIRES_X86_AVX2;
35109 GemmMicrokernelTester()
35110 .mr(2)
35111 .nr(8)
35112 .kr(8)
35113 .sr(1)
35114 .m(2)
35115 .n(8)
35116 .k(8)
35117 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080035118 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070035119 }
35120
35121 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_eq_8_strided_a) {
35122 TEST_REQUIRES_X86_AVX2;
35123 GemmMicrokernelTester()
35124 .mr(2)
35125 .nr(8)
35126 .kr(8)
35127 .sr(1)
35128 .m(2)
35129 .n(8)
35130 .k(8)
35131 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080035132 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070035133 }
35134
35135 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_eq_8_subtile) {
35136 TEST_REQUIRES_X86_AVX2;
Zhi An Ng83844ae2022-01-14 09:52:25 -080035137 for (uint32_t n = 1; n <= 8; n++) {
35138 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -070035139 GemmMicrokernelTester()
35140 .mr(2)
35141 .nr(8)
35142 .kr(8)
35143 .sr(1)
35144 .m(m)
35145 .n(n)
35146 .k(8)
35147 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080035148 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070035149 }
35150 }
35151 }
35152
35153 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_eq_8_subtile_m) {
35154 TEST_REQUIRES_X86_AVX2;
35155 for (uint32_t m = 1; m <= 2; m++) {
35156 GemmMicrokernelTester()
35157 .mr(2)
35158 .nr(8)
35159 .kr(8)
35160 .sr(1)
35161 .m(m)
35162 .n(8)
35163 .k(8)
35164 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080035165 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070035166 }
35167 }
35168
35169 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_eq_8_subtile_n) {
35170 TEST_REQUIRES_X86_AVX2;
35171 for (uint32_t n = 1; n <= 8; n++) {
35172 GemmMicrokernelTester()
35173 .mr(2)
35174 .nr(8)
35175 .kr(8)
35176 .sr(1)
35177 .m(2)
35178 .n(n)
35179 .k(8)
35180 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080035181 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070035182 }
35183 }
35184
35185 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_lt_8) {
35186 TEST_REQUIRES_X86_AVX2;
35187 for (size_t k = 1; k < 8; k++) {
35188 GemmMicrokernelTester()
35189 .mr(2)
35190 .nr(8)
35191 .kr(8)
35192 .sr(1)
35193 .m(2)
35194 .n(8)
35195 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080035196 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070035197 }
35198 }
35199
35200 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_lt_8_strided_a) {
35201 TEST_REQUIRES_X86_AVX2;
35202 for (size_t k = 1; k < 8; k++) {
35203 GemmMicrokernelTester()
35204 .mr(2)
35205 .nr(8)
35206 .kr(8)
35207 .sr(1)
35208 .m(2)
35209 .n(8)
35210 .k(k)
35211 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080035212 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070035213 }
35214 }
35215
35216 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_lt_8_subtile) {
35217 TEST_REQUIRES_X86_AVX2;
35218 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080035219 for (uint32_t n = 1; n <= 8; n++) {
35220 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -070035221 GemmMicrokernelTester()
35222 .mr(2)
35223 .nr(8)
35224 .kr(8)
35225 .sr(1)
35226 .m(m)
35227 .n(n)
35228 .k(k)
35229 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080035230 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070035231 }
35232 }
35233 }
35234 }
35235
35236 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_gt_8) {
35237 TEST_REQUIRES_X86_AVX2;
35238 for (size_t k = 9; k < 16; k++) {
35239 GemmMicrokernelTester()
35240 .mr(2)
35241 .nr(8)
35242 .kr(8)
35243 .sr(1)
35244 .m(2)
35245 .n(8)
35246 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080035247 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070035248 }
35249 }
35250
35251 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_gt_8_strided_a) {
35252 TEST_REQUIRES_X86_AVX2;
35253 for (size_t k = 9; k < 16; k++) {
35254 GemmMicrokernelTester()
35255 .mr(2)
35256 .nr(8)
35257 .kr(8)
35258 .sr(1)
35259 .m(2)
35260 .n(8)
35261 .k(k)
35262 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080035263 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070035264 }
35265 }
35266
35267 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_gt_8_subtile) {
35268 TEST_REQUIRES_X86_AVX2;
35269 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080035270 for (uint32_t n = 1; n <= 8; n++) {
35271 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -070035272 GemmMicrokernelTester()
35273 .mr(2)
35274 .nr(8)
35275 .kr(8)
35276 .sr(1)
35277 .m(m)
35278 .n(n)
35279 .k(k)
35280 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080035281 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070035282 }
35283 }
35284 }
35285 }
35286
35287 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_div_8) {
35288 TEST_REQUIRES_X86_AVX2;
35289 for (size_t k = 16; k <= 80; k += 8) {
35290 GemmMicrokernelTester()
35291 .mr(2)
35292 .nr(8)
35293 .kr(8)
35294 .sr(1)
35295 .m(2)
35296 .n(8)
35297 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080035298 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070035299 }
35300 }
35301
35302 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_div_8_strided_a) {
35303 TEST_REQUIRES_X86_AVX2;
35304 for (size_t k = 16; k <= 80; k += 8) {
35305 GemmMicrokernelTester()
35306 .mr(2)
35307 .nr(8)
35308 .kr(8)
35309 .sr(1)
35310 .m(2)
35311 .n(8)
35312 .k(k)
35313 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080035314 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070035315 }
35316 }
35317
35318 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_div_8_subtile) {
35319 TEST_REQUIRES_X86_AVX2;
35320 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080035321 for (uint32_t n = 1; n <= 8; n++) {
35322 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -070035323 GemmMicrokernelTester()
35324 .mr(2)
35325 .nr(8)
35326 .kr(8)
35327 .sr(1)
35328 .m(m)
35329 .n(n)
35330 .k(k)
35331 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080035332 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070035333 }
35334 }
35335 }
35336 }
35337
35338 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AVX2, n_gt_8) {
35339 TEST_REQUIRES_X86_AVX2;
35340 for (uint32_t n = 9; n < 16; n++) {
35341 for (size_t k = 1; k <= 40; k += 9) {
35342 GemmMicrokernelTester()
35343 .mr(2)
35344 .nr(8)
35345 .kr(8)
35346 .sr(1)
35347 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080035348 .n(n)
Marat Dukhan9b474cf2021-05-25 16:37:48 -070035349 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080035350 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070035351 }
35352 }
35353 }
35354
35355 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AVX2, n_gt_8_strided_cn) {
35356 TEST_REQUIRES_X86_AVX2;
35357 for (uint32_t n = 9; n < 16; n++) {
35358 for (size_t k = 1; k <= 40; k += 9) {
35359 GemmMicrokernelTester()
35360 .mr(2)
35361 .nr(8)
35362 .kr(8)
35363 .sr(1)
35364 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080035365 .n(n)
Marat Dukhan9b474cf2021-05-25 16:37:48 -070035366 .k(k)
35367 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080035368 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070035369 }
35370 }
35371 }
35372
35373 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AVX2, n_gt_8_strided_a) {
35374 TEST_REQUIRES_X86_AVX2;
35375 for (uint32_t n = 9; n < 16; n++) {
35376 for (size_t k = 1; k <= 40; k += 9) {
35377 GemmMicrokernelTester()
35378 .mr(2)
35379 .nr(8)
35380 .kr(8)
35381 .sr(1)
35382 .m(2)
35383 .n(n)
35384 .k(k)
35385 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080035386 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070035387 }
35388 }
35389 }
35390
35391 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AVX2, n_gt_8_subtile) {
35392 TEST_REQUIRES_X86_AVX2;
35393 for (uint32_t n = 9; n < 16; n++) {
35394 for (size_t k = 1; k <= 40; k += 9) {
35395 for (uint32_t m = 1; m <= 2; m++) {
35396 GemmMicrokernelTester()
35397 .mr(2)
35398 .nr(8)
35399 .kr(8)
35400 .sr(1)
35401 .m(m)
35402 .n(n)
35403 .k(k)
35404 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080035405 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070035406 }
35407 }
35408 }
35409 }
35410
35411 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AVX2, n_div_8) {
35412 TEST_REQUIRES_X86_AVX2;
35413 for (uint32_t n = 16; n <= 24; n += 8) {
35414 for (size_t k = 1; k <= 40; k += 9) {
35415 GemmMicrokernelTester()
35416 .mr(2)
35417 .nr(8)
35418 .kr(8)
35419 .sr(1)
35420 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080035421 .n(n)
Marat Dukhan9b474cf2021-05-25 16:37:48 -070035422 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080035423 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070035424 }
35425 }
35426 }
35427
35428 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AVX2, n_div_8_strided_cn) {
35429 TEST_REQUIRES_X86_AVX2;
35430 for (uint32_t n = 16; n <= 24; n += 8) {
35431 for (size_t k = 1; k <= 40; k += 9) {
35432 GemmMicrokernelTester()
35433 .mr(2)
35434 .nr(8)
35435 .kr(8)
35436 .sr(1)
35437 .m(2)
35438 .n(n)
35439 .k(k)
35440 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080035441 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070035442 }
35443 }
35444 }
35445
35446 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AVX2, n_div_8_strided_a) {
35447 TEST_REQUIRES_X86_AVX2;
35448 for (uint32_t n = 16; n <= 24; n += 8) {
35449 for (size_t k = 1; k <= 40; k += 9) {
35450 GemmMicrokernelTester()
35451 .mr(2)
35452 .nr(8)
35453 .kr(8)
35454 .sr(1)
35455 .m(2)
35456 .n(n)
35457 .k(k)
35458 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080035459 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070035460 }
35461 }
35462 }
35463
35464 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AVX2, n_div_8_subtile) {
35465 TEST_REQUIRES_X86_AVX2;
35466 for (uint32_t n = 16; n <= 24; n += 8) {
35467 for (size_t k = 1; k <= 40; k += 9) {
35468 for (uint32_t m = 1; m <= 2; m++) {
35469 GemmMicrokernelTester()
35470 .mr(2)
35471 .nr(8)
35472 .kr(8)
35473 .sr(1)
35474 .m(m)
35475 .n(n)
35476 .k(k)
35477 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080035478 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070035479 }
35480 }
35481 }
35482 }
35483
35484 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AVX2, strided_cm_subtile) {
35485 TEST_REQUIRES_X86_AVX2;
35486 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080035487 for (uint32_t n = 1; n <= 8; n++) {
35488 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -070035489 GemmMicrokernelTester()
35490 .mr(2)
35491 .nr(8)
35492 .kr(8)
35493 .sr(1)
35494 .m(m)
35495 .n(n)
35496 .k(k)
35497 .cm_stride(11)
35498 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080035499 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070035500 }
35501 }
35502 }
35503 }
35504
35505 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AVX2, qmin) {
35506 TEST_REQUIRES_X86_AVX2;
35507 GemmMicrokernelTester()
35508 .mr(2)
35509 .nr(8)
35510 .kr(8)
35511 .sr(1)
35512 .m(2)
35513 .n(8)
35514 .k(8)
35515 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080035516 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070035517 }
35518
35519 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AVX2, qmax) {
35520 TEST_REQUIRES_X86_AVX2;
35521 GemmMicrokernelTester()
35522 .mr(2)
35523 .nr(8)
35524 .kr(8)
35525 .sr(1)
35526 .m(2)
35527 .n(8)
35528 .k(8)
35529 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080035530 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070035531 }
35532
35533 TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AVX2, strided_cm) {
35534 TEST_REQUIRES_X86_AVX2;
35535 GemmMicrokernelTester()
35536 .mr(2)
35537 .nr(8)
35538 .kr(8)
35539 .sr(1)
35540 .m(2)
35541 .n(8)
35542 .k(8)
35543 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080035544 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070035545 }
35546#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
35547
35548
35549#if XNN_ARCH_X86 || XNN_ARCH_X86_64
35550 TEST(QS8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_eq_8) {
35551 TEST_REQUIRES_X86_AVX2;
35552 GemmMicrokernelTester()
35553 .mr(3)
35554 .nr(8)
35555 .kr(8)
35556 .sr(1)
35557 .m(3)
35558 .n(8)
35559 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080035560 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070035561 }
35562
35563 TEST(QS8_GEMM_MINMAX_FP32_3X8C8__AVX2, strided_cn) {
35564 TEST_REQUIRES_X86_AVX2;
35565 GemmMicrokernelTester()
35566 .mr(3)
35567 .nr(8)
35568 .kr(8)
35569 .sr(1)
35570 .m(3)
35571 .n(8)
35572 .k(8)
35573 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080035574 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070035575 }
35576
35577 TEST(QS8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_eq_8_strided_a) {
35578 TEST_REQUIRES_X86_AVX2;
35579 GemmMicrokernelTester()
35580 .mr(3)
35581 .nr(8)
35582 .kr(8)
35583 .sr(1)
35584 .m(3)
35585 .n(8)
35586 .k(8)
35587 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080035588 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070035589 }
35590
35591 TEST(QS8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_eq_8_subtile) {
35592 TEST_REQUIRES_X86_AVX2;
Zhi An Ng83844ae2022-01-14 09:52:25 -080035593 for (uint32_t n = 1; n <= 8; n++) {
35594 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -070035595 GemmMicrokernelTester()
35596 .mr(3)
35597 .nr(8)
35598 .kr(8)
35599 .sr(1)
35600 .m(m)
35601 .n(n)
35602 .k(8)
35603 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080035604 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070035605 }
35606 }
35607 }
35608
35609 TEST(QS8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_eq_8_subtile_m) {
35610 TEST_REQUIRES_X86_AVX2;
35611 for (uint32_t m = 1; m <= 3; m++) {
35612 GemmMicrokernelTester()
35613 .mr(3)
35614 .nr(8)
35615 .kr(8)
35616 .sr(1)
35617 .m(m)
35618 .n(8)
35619 .k(8)
35620 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080035621 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070035622 }
35623 }
35624
35625 TEST(QS8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_eq_8_subtile_n) {
35626 TEST_REQUIRES_X86_AVX2;
35627 for (uint32_t n = 1; n <= 8; n++) {
35628 GemmMicrokernelTester()
35629 .mr(3)
35630 .nr(8)
35631 .kr(8)
35632 .sr(1)
35633 .m(3)
35634 .n(n)
35635 .k(8)
35636 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080035637 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070035638 }
35639 }
35640
35641 TEST(QS8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_lt_8) {
35642 TEST_REQUIRES_X86_AVX2;
35643 for (size_t k = 1; k < 8; k++) {
35644 GemmMicrokernelTester()
35645 .mr(3)
35646 .nr(8)
35647 .kr(8)
35648 .sr(1)
35649 .m(3)
35650 .n(8)
35651 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080035652 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070035653 }
35654 }
35655
35656 TEST(QS8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_lt_8_strided_a) {
35657 TEST_REQUIRES_X86_AVX2;
35658 for (size_t k = 1; k < 8; k++) {
35659 GemmMicrokernelTester()
35660 .mr(3)
35661 .nr(8)
35662 .kr(8)
35663 .sr(1)
35664 .m(3)
35665 .n(8)
35666 .k(k)
35667 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080035668 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070035669 }
35670 }
35671
35672 TEST(QS8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_lt_8_subtile) {
35673 TEST_REQUIRES_X86_AVX2;
35674 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080035675 for (uint32_t n = 1; n <= 8; n++) {
35676 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -070035677 GemmMicrokernelTester()
35678 .mr(3)
35679 .nr(8)
35680 .kr(8)
35681 .sr(1)
35682 .m(m)
35683 .n(n)
35684 .k(k)
35685 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080035686 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070035687 }
35688 }
35689 }
35690 }
35691
35692 TEST(QS8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_gt_8) {
35693 TEST_REQUIRES_X86_AVX2;
35694 for (size_t k = 9; k < 16; k++) {
35695 GemmMicrokernelTester()
35696 .mr(3)
35697 .nr(8)
35698 .kr(8)
35699 .sr(1)
35700 .m(3)
35701 .n(8)
35702 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080035703 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070035704 }
35705 }
35706
35707 TEST(QS8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_gt_8_strided_a) {
35708 TEST_REQUIRES_X86_AVX2;
35709 for (size_t k = 9; k < 16; k++) {
35710 GemmMicrokernelTester()
35711 .mr(3)
35712 .nr(8)
35713 .kr(8)
35714 .sr(1)
35715 .m(3)
35716 .n(8)
35717 .k(k)
35718 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080035719 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070035720 }
35721 }
35722
35723 TEST(QS8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_gt_8_subtile) {
35724 TEST_REQUIRES_X86_AVX2;
35725 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080035726 for (uint32_t n = 1; n <= 8; n++) {
35727 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -070035728 GemmMicrokernelTester()
35729 .mr(3)
35730 .nr(8)
35731 .kr(8)
35732 .sr(1)
35733 .m(m)
35734 .n(n)
35735 .k(k)
35736 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080035737 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070035738 }
35739 }
35740 }
35741 }
35742
35743 TEST(QS8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_div_8) {
35744 TEST_REQUIRES_X86_AVX2;
35745 for (size_t k = 16; k <= 80; k += 8) {
35746 GemmMicrokernelTester()
35747 .mr(3)
35748 .nr(8)
35749 .kr(8)
35750 .sr(1)
35751 .m(3)
35752 .n(8)
35753 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080035754 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070035755 }
35756 }
35757
35758 TEST(QS8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_div_8_strided_a) {
35759 TEST_REQUIRES_X86_AVX2;
35760 for (size_t k = 16; k <= 80; k += 8) {
35761 GemmMicrokernelTester()
35762 .mr(3)
35763 .nr(8)
35764 .kr(8)
35765 .sr(1)
35766 .m(3)
35767 .n(8)
35768 .k(k)
35769 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080035770 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070035771 }
35772 }
35773
35774 TEST(QS8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_div_8_subtile) {
35775 TEST_REQUIRES_X86_AVX2;
35776 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080035777 for (uint32_t n = 1; n <= 8; n++) {
35778 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -070035779 GemmMicrokernelTester()
35780 .mr(3)
35781 .nr(8)
35782 .kr(8)
35783 .sr(1)
35784 .m(m)
35785 .n(n)
35786 .k(k)
35787 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080035788 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070035789 }
35790 }
35791 }
35792 }
35793
35794 TEST(QS8_GEMM_MINMAX_FP32_3X8C8__AVX2, n_gt_8) {
35795 TEST_REQUIRES_X86_AVX2;
35796 for (uint32_t n = 9; n < 16; n++) {
35797 for (size_t k = 1; k <= 40; k += 9) {
35798 GemmMicrokernelTester()
35799 .mr(3)
35800 .nr(8)
35801 .kr(8)
35802 .sr(1)
35803 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080035804 .n(n)
Marat Dukhan9b474cf2021-05-25 16:37:48 -070035805 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080035806 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070035807 }
35808 }
35809 }
35810
35811 TEST(QS8_GEMM_MINMAX_FP32_3X8C8__AVX2, n_gt_8_strided_cn) {
35812 TEST_REQUIRES_X86_AVX2;
35813 for (uint32_t n = 9; n < 16; n++) {
35814 for (size_t k = 1; k <= 40; k += 9) {
35815 GemmMicrokernelTester()
35816 .mr(3)
35817 .nr(8)
35818 .kr(8)
35819 .sr(1)
35820 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080035821 .n(n)
Marat Dukhan9b474cf2021-05-25 16:37:48 -070035822 .k(k)
35823 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080035824 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070035825 }
35826 }
35827 }
35828
35829 TEST(QS8_GEMM_MINMAX_FP32_3X8C8__AVX2, n_gt_8_strided_a) {
35830 TEST_REQUIRES_X86_AVX2;
35831 for (uint32_t n = 9; n < 16; n++) {
35832 for (size_t k = 1; k <= 40; k += 9) {
35833 GemmMicrokernelTester()
35834 .mr(3)
35835 .nr(8)
35836 .kr(8)
35837 .sr(1)
35838 .m(3)
35839 .n(n)
35840 .k(k)
35841 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080035842 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070035843 }
35844 }
35845 }
35846
35847 TEST(QS8_GEMM_MINMAX_FP32_3X8C8__AVX2, n_gt_8_subtile) {
35848 TEST_REQUIRES_X86_AVX2;
35849 for (uint32_t n = 9; n < 16; n++) {
35850 for (size_t k = 1; k <= 40; k += 9) {
35851 for (uint32_t m = 1; m <= 3; m++) {
35852 GemmMicrokernelTester()
35853 .mr(3)
35854 .nr(8)
35855 .kr(8)
35856 .sr(1)
35857 .m(m)
35858 .n(n)
35859 .k(k)
35860 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080035861 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070035862 }
35863 }
35864 }
35865 }
35866
35867 TEST(QS8_GEMM_MINMAX_FP32_3X8C8__AVX2, n_div_8) {
35868 TEST_REQUIRES_X86_AVX2;
35869 for (uint32_t n = 16; n <= 24; n += 8) {
35870 for (size_t k = 1; k <= 40; k += 9) {
35871 GemmMicrokernelTester()
35872 .mr(3)
35873 .nr(8)
35874 .kr(8)
35875 .sr(1)
35876 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080035877 .n(n)
Marat Dukhan9b474cf2021-05-25 16:37:48 -070035878 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080035879 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070035880 }
35881 }
35882 }
35883
35884 TEST(QS8_GEMM_MINMAX_FP32_3X8C8__AVX2, n_div_8_strided_cn) {
35885 TEST_REQUIRES_X86_AVX2;
35886 for (uint32_t n = 16; n <= 24; n += 8) {
35887 for (size_t k = 1; k <= 40; k += 9) {
35888 GemmMicrokernelTester()
35889 .mr(3)
35890 .nr(8)
35891 .kr(8)
35892 .sr(1)
35893 .m(3)
35894 .n(n)
35895 .k(k)
35896 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080035897 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070035898 }
35899 }
35900 }
35901
35902 TEST(QS8_GEMM_MINMAX_FP32_3X8C8__AVX2, n_div_8_strided_a) {
35903 TEST_REQUIRES_X86_AVX2;
35904 for (uint32_t n = 16; n <= 24; n += 8) {
35905 for (size_t k = 1; k <= 40; k += 9) {
35906 GemmMicrokernelTester()
35907 .mr(3)
35908 .nr(8)
35909 .kr(8)
35910 .sr(1)
35911 .m(3)
35912 .n(n)
35913 .k(k)
35914 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080035915 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070035916 }
35917 }
35918 }
35919
35920 TEST(QS8_GEMM_MINMAX_FP32_3X8C8__AVX2, n_div_8_subtile) {
35921 TEST_REQUIRES_X86_AVX2;
35922 for (uint32_t n = 16; n <= 24; n += 8) {
35923 for (size_t k = 1; k <= 40; k += 9) {
35924 for (uint32_t m = 1; m <= 3; m++) {
35925 GemmMicrokernelTester()
35926 .mr(3)
35927 .nr(8)
35928 .kr(8)
35929 .sr(1)
35930 .m(m)
35931 .n(n)
35932 .k(k)
35933 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080035934 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070035935 }
35936 }
35937 }
35938 }
35939
35940 TEST(QS8_GEMM_MINMAX_FP32_3X8C8__AVX2, strided_cm_subtile) {
35941 TEST_REQUIRES_X86_AVX2;
35942 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080035943 for (uint32_t n = 1; n <= 8; n++) {
35944 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan9b474cf2021-05-25 16:37:48 -070035945 GemmMicrokernelTester()
35946 .mr(3)
35947 .nr(8)
35948 .kr(8)
35949 .sr(1)
35950 .m(m)
35951 .n(n)
35952 .k(k)
35953 .cm_stride(11)
35954 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080035955 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070035956 }
35957 }
35958 }
35959 }
35960
35961 TEST(QS8_GEMM_MINMAX_FP32_3X8C8__AVX2, qmin) {
35962 TEST_REQUIRES_X86_AVX2;
35963 GemmMicrokernelTester()
35964 .mr(3)
35965 .nr(8)
35966 .kr(8)
35967 .sr(1)
35968 .m(3)
35969 .n(8)
35970 .k(8)
35971 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080035972 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070035973 }
35974
35975 TEST(QS8_GEMM_MINMAX_FP32_3X8C8__AVX2, qmax) {
35976 TEST_REQUIRES_X86_AVX2;
35977 GemmMicrokernelTester()
35978 .mr(3)
35979 .nr(8)
35980 .kr(8)
35981 .sr(1)
35982 .m(3)
35983 .n(8)
35984 .k(8)
35985 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080035986 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070035987 }
35988
35989 TEST(QS8_GEMM_MINMAX_FP32_3X8C8__AVX2, strided_cm) {
35990 TEST_REQUIRES_X86_AVX2;
35991 GemmMicrokernelTester()
35992 .mr(3)
35993 .nr(8)
35994 .kr(8)
35995 .sr(1)
35996 .m(3)
35997 .n(8)
35998 .k(8)
35999 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080036000 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan9b474cf2021-05-25 16:37:48 -070036001 }
36002#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan71855ee2021-05-25 19:05:06 -070036003
36004
36005#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan0b043742021-06-02 18:29:11 -070036006 TEST(QS8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2, k_eq_8) {
36007 TEST_REQUIRES_X86_AVX2;
36008 GemmMicrokernelTester()
36009 .extended_weights(true)
36010 .mr(1)
36011 .nr(8)
36012 .kr(8)
36013 .sr(1)
36014 .m(1)
36015 .n(8)
36016 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080036017 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0b043742021-06-02 18:29:11 -070036018 }
36019
36020 TEST(QS8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2, strided_cn) {
36021 TEST_REQUIRES_X86_AVX2;
36022 GemmMicrokernelTester()
36023 .extended_weights(true)
36024 .mr(1)
36025 .nr(8)
36026 .kr(8)
36027 .sr(1)
36028 .m(1)
36029 .n(8)
36030 .k(8)
36031 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080036032 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0b043742021-06-02 18:29:11 -070036033 }
36034
36035 TEST(QS8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2, k_eq_8_strided_a) {
36036 TEST_REQUIRES_X86_AVX2;
36037 GemmMicrokernelTester()
36038 .extended_weights(true)
36039 .mr(1)
36040 .nr(8)
36041 .kr(8)
36042 .sr(1)
36043 .m(1)
36044 .n(8)
36045 .k(8)
36046 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080036047 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0b043742021-06-02 18:29:11 -070036048 }
36049
36050 TEST(QS8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2, k_eq_8_subtile) {
36051 TEST_REQUIRES_X86_AVX2;
Zhi An Ng83844ae2022-01-14 09:52:25 -080036052 for (uint32_t n = 1; n <= 8; n++) {
36053 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan0b043742021-06-02 18:29:11 -070036054 GemmMicrokernelTester()
36055 .extended_weights(true)
36056 .mr(1)
36057 .nr(8)
36058 .kr(8)
36059 .sr(1)
36060 .m(m)
36061 .n(n)
36062 .k(8)
36063 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080036064 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0b043742021-06-02 18:29:11 -070036065 }
36066 }
36067 }
36068
36069 TEST(QS8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2, k_eq_8_subtile_m) {
36070 TEST_REQUIRES_X86_AVX2;
36071 for (uint32_t m = 1; m <= 1; m++) {
36072 GemmMicrokernelTester()
36073 .extended_weights(true)
36074 .mr(1)
36075 .nr(8)
36076 .kr(8)
36077 .sr(1)
36078 .m(m)
36079 .n(8)
36080 .k(8)
36081 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080036082 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0b043742021-06-02 18:29:11 -070036083 }
36084 }
36085
36086 TEST(QS8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2, k_eq_8_subtile_n) {
36087 TEST_REQUIRES_X86_AVX2;
36088 for (uint32_t n = 1; n <= 8; n++) {
36089 GemmMicrokernelTester()
36090 .extended_weights(true)
36091 .mr(1)
36092 .nr(8)
36093 .kr(8)
36094 .sr(1)
36095 .m(1)
36096 .n(n)
36097 .k(8)
36098 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080036099 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0b043742021-06-02 18:29:11 -070036100 }
36101 }
36102
36103 TEST(QS8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2, k_lt_8) {
36104 TEST_REQUIRES_X86_AVX2;
36105 for (size_t k = 1; k < 8; k++) {
36106 GemmMicrokernelTester()
36107 .extended_weights(true)
36108 .mr(1)
36109 .nr(8)
36110 .kr(8)
36111 .sr(1)
36112 .m(1)
36113 .n(8)
36114 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080036115 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0b043742021-06-02 18:29:11 -070036116 }
36117 }
36118
36119 TEST(QS8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2, k_lt_8_strided_a) {
36120 TEST_REQUIRES_X86_AVX2;
36121 for (size_t k = 1; k < 8; k++) {
36122 GemmMicrokernelTester()
36123 .extended_weights(true)
36124 .mr(1)
36125 .nr(8)
36126 .kr(8)
36127 .sr(1)
36128 .m(1)
36129 .n(8)
36130 .k(k)
36131 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080036132 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0b043742021-06-02 18:29:11 -070036133 }
36134 }
36135
36136 TEST(QS8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2, k_lt_8_subtile) {
36137 TEST_REQUIRES_X86_AVX2;
36138 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080036139 for (uint32_t n = 1; n <= 8; n++) {
36140 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan0b043742021-06-02 18:29:11 -070036141 GemmMicrokernelTester()
36142 .extended_weights(true)
36143 .mr(1)
36144 .nr(8)
36145 .kr(8)
36146 .sr(1)
36147 .m(m)
36148 .n(n)
36149 .k(k)
36150 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080036151 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0b043742021-06-02 18:29:11 -070036152 }
36153 }
36154 }
36155 }
36156
36157 TEST(QS8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2, k_gt_8) {
36158 TEST_REQUIRES_X86_AVX2;
36159 for (size_t k = 9; k < 16; k++) {
36160 GemmMicrokernelTester()
36161 .extended_weights(true)
36162 .mr(1)
36163 .nr(8)
36164 .kr(8)
36165 .sr(1)
36166 .m(1)
36167 .n(8)
36168 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080036169 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0b043742021-06-02 18:29:11 -070036170 }
36171 }
36172
36173 TEST(QS8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2, k_gt_8_strided_a) {
36174 TEST_REQUIRES_X86_AVX2;
36175 for (size_t k = 9; k < 16; k++) {
36176 GemmMicrokernelTester()
36177 .extended_weights(true)
36178 .mr(1)
36179 .nr(8)
36180 .kr(8)
36181 .sr(1)
36182 .m(1)
36183 .n(8)
36184 .k(k)
36185 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080036186 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0b043742021-06-02 18:29:11 -070036187 }
36188 }
36189
36190 TEST(QS8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2, k_gt_8_subtile) {
36191 TEST_REQUIRES_X86_AVX2;
36192 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080036193 for (uint32_t n = 1; n <= 8; n++) {
36194 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan0b043742021-06-02 18:29:11 -070036195 GemmMicrokernelTester()
36196 .extended_weights(true)
36197 .mr(1)
36198 .nr(8)
36199 .kr(8)
36200 .sr(1)
36201 .m(m)
36202 .n(n)
36203 .k(k)
36204 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080036205 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0b043742021-06-02 18:29:11 -070036206 }
36207 }
36208 }
36209 }
36210
36211 TEST(QS8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2, k_div_8) {
36212 TEST_REQUIRES_X86_AVX2;
36213 for (size_t k = 16; k <= 80; k += 8) {
36214 GemmMicrokernelTester()
36215 .extended_weights(true)
36216 .mr(1)
36217 .nr(8)
36218 .kr(8)
36219 .sr(1)
36220 .m(1)
36221 .n(8)
36222 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080036223 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0b043742021-06-02 18:29:11 -070036224 }
36225 }
36226
36227 TEST(QS8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2, k_div_8_strided_a) {
36228 TEST_REQUIRES_X86_AVX2;
36229 for (size_t k = 16; k <= 80; k += 8) {
36230 GemmMicrokernelTester()
36231 .extended_weights(true)
36232 .mr(1)
36233 .nr(8)
36234 .kr(8)
36235 .sr(1)
36236 .m(1)
36237 .n(8)
36238 .k(k)
36239 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080036240 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0b043742021-06-02 18:29:11 -070036241 }
36242 }
36243
36244 TEST(QS8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2, k_div_8_subtile) {
36245 TEST_REQUIRES_X86_AVX2;
36246 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080036247 for (uint32_t n = 1; n <= 8; n++) {
36248 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan0b043742021-06-02 18:29:11 -070036249 GemmMicrokernelTester()
36250 .extended_weights(true)
36251 .mr(1)
36252 .nr(8)
36253 .kr(8)
36254 .sr(1)
36255 .m(m)
36256 .n(n)
36257 .k(k)
36258 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080036259 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0b043742021-06-02 18:29:11 -070036260 }
36261 }
36262 }
36263 }
36264
36265 TEST(QS8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2, n_gt_8) {
36266 TEST_REQUIRES_X86_AVX2;
36267 for (uint32_t n = 9; n < 16; n++) {
36268 for (size_t k = 1; k <= 40; k += 9) {
36269 GemmMicrokernelTester()
36270 .extended_weights(true)
36271 .mr(1)
36272 .nr(8)
36273 .kr(8)
36274 .sr(1)
36275 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080036276 .n(n)
Marat Dukhan0b043742021-06-02 18:29:11 -070036277 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080036278 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0b043742021-06-02 18:29:11 -070036279 }
36280 }
36281 }
36282
36283 TEST(QS8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2, n_gt_8_strided_cn) {
36284 TEST_REQUIRES_X86_AVX2;
36285 for (uint32_t n = 9; n < 16; n++) {
36286 for (size_t k = 1; k <= 40; k += 9) {
36287 GemmMicrokernelTester()
36288 .extended_weights(true)
36289 .mr(1)
36290 .nr(8)
36291 .kr(8)
36292 .sr(1)
36293 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080036294 .n(n)
Marat Dukhan0b043742021-06-02 18:29:11 -070036295 .k(k)
36296 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080036297 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0b043742021-06-02 18:29:11 -070036298 }
36299 }
36300 }
36301
36302 TEST(QS8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2, n_gt_8_strided_a) {
36303 TEST_REQUIRES_X86_AVX2;
36304 for (uint32_t n = 9; n < 16; n++) {
36305 for (size_t k = 1; k <= 40; k += 9) {
36306 GemmMicrokernelTester()
36307 .extended_weights(true)
36308 .mr(1)
36309 .nr(8)
36310 .kr(8)
36311 .sr(1)
36312 .m(1)
36313 .n(n)
36314 .k(k)
36315 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080036316 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0b043742021-06-02 18:29:11 -070036317 }
36318 }
36319 }
36320
36321 TEST(QS8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2, n_gt_8_subtile) {
36322 TEST_REQUIRES_X86_AVX2;
36323 for (uint32_t n = 9; n < 16; n++) {
36324 for (size_t k = 1; k <= 40; k += 9) {
36325 for (uint32_t m = 1; m <= 1; m++) {
36326 GemmMicrokernelTester()
36327 .extended_weights(true)
36328 .mr(1)
36329 .nr(8)
36330 .kr(8)
36331 .sr(1)
36332 .m(m)
36333 .n(n)
36334 .k(k)
36335 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080036336 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0b043742021-06-02 18:29:11 -070036337 }
36338 }
36339 }
36340 }
36341
36342 TEST(QS8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2, n_div_8) {
36343 TEST_REQUIRES_X86_AVX2;
36344 for (uint32_t n = 16; n <= 24; n += 8) {
36345 for (size_t k = 1; k <= 40; k += 9) {
36346 GemmMicrokernelTester()
36347 .extended_weights(true)
36348 .mr(1)
36349 .nr(8)
36350 .kr(8)
36351 .sr(1)
36352 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080036353 .n(n)
Marat Dukhan0b043742021-06-02 18:29:11 -070036354 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080036355 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0b043742021-06-02 18:29:11 -070036356 }
36357 }
36358 }
36359
36360 TEST(QS8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2, n_div_8_strided_cn) {
36361 TEST_REQUIRES_X86_AVX2;
36362 for (uint32_t n = 16; n <= 24; n += 8) {
36363 for (size_t k = 1; k <= 40; k += 9) {
36364 GemmMicrokernelTester()
36365 .extended_weights(true)
36366 .mr(1)
36367 .nr(8)
36368 .kr(8)
36369 .sr(1)
36370 .m(1)
36371 .n(n)
36372 .k(k)
36373 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080036374 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0b043742021-06-02 18:29:11 -070036375 }
36376 }
36377 }
36378
36379 TEST(QS8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2, n_div_8_strided_a) {
36380 TEST_REQUIRES_X86_AVX2;
36381 for (uint32_t n = 16; n <= 24; n += 8) {
36382 for (size_t k = 1; k <= 40; k += 9) {
36383 GemmMicrokernelTester()
36384 .extended_weights(true)
36385 .mr(1)
36386 .nr(8)
36387 .kr(8)
36388 .sr(1)
36389 .m(1)
36390 .n(n)
36391 .k(k)
36392 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080036393 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0b043742021-06-02 18:29:11 -070036394 }
36395 }
36396 }
36397
36398 TEST(QS8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2, n_div_8_subtile) {
36399 TEST_REQUIRES_X86_AVX2;
36400 for (uint32_t n = 16; n <= 24; n += 8) {
36401 for (size_t k = 1; k <= 40; k += 9) {
36402 for (uint32_t m = 1; m <= 1; m++) {
36403 GemmMicrokernelTester()
36404 .extended_weights(true)
36405 .mr(1)
36406 .nr(8)
36407 .kr(8)
36408 .sr(1)
36409 .m(m)
36410 .n(n)
36411 .k(k)
36412 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080036413 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0b043742021-06-02 18:29:11 -070036414 }
36415 }
36416 }
36417 }
36418
36419 TEST(QS8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2, strided_cm_subtile) {
36420 TEST_REQUIRES_X86_AVX2;
36421 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080036422 for (uint32_t n = 1; n <= 8; n++) {
36423 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan0b043742021-06-02 18:29:11 -070036424 GemmMicrokernelTester()
36425 .extended_weights(true)
36426 .mr(1)
36427 .nr(8)
36428 .kr(8)
36429 .sr(1)
36430 .m(m)
36431 .n(n)
36432 .k(k)
36433 .cm_stride(11)
36434 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080036435 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0b043742021-06-02 18:29:11 -070036436 }
36437 }
36438 }
36439 }
36440
36441 TEST(QS8_GEMM_XW_MINMAX_FP32_1X8C8__AVX2, strided_cm) {
36442 TEST_REQUIRES_X86_AVX2;
36443 GemmMicrokernelTester()
36444 .extended_weights(true)
36445 .mr(1)
36446 .nr(8)
36447 .kr(8)
36448 .sr(1)
36449 .m(1)
36450 .n(8)
36451 .k(8)
36452 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080036453 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
Marat Dukhan0b043742021-06-02 18:29:11 -070036454 }
36455#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
36456
36457
36458#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan71855ee2021-05-25 19:05:06 -070036459 TEST(QS8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_eq_8) {
36460 TEST_REQUIRES_X86_AVX512SKX;
36461 GemmMicrokernelTester()
36462 .mr(1)
36463 .nr(16)
36464 .kr(8)
36465 .sr(1)
36466 .m(1)
36467 .n(16)
36468 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080036469 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070036470 }
36471
36472 TEST(QS8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, strided_cn) {
36473 TEST_REQUIRES_X86_AVX512SKX;
36474 GemmMicrokernelTester()
36475 .mr(1)
36476 .nr(16)
36477 .kr(8)
36478 .sr(1)
36479 .m(1)
36480 .n(16)
36481 .k(8)
36482 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080036483 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070036484 }
36485
36486 TEST(QS8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_eq_8_strided_a) {
36487 TEST_REQUIRES_X86_AVX512SKX;
36488 GemmMicrokernelTester()
36489 .mr(1)
36490 .nr(16)
36491 .kr(8)
36492 .sr(1)
36493 .m(1)
36494 .n(16)
36495 .k(8)
36496 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080036497 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070036498 }
36499
36500 TEST(QS8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_eq_8_subtile) {
36501 TEST_REQUIRES_X86_AVX512SKX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080036502 for (uint32_t n = 1; n <= 16; n++) {
36503 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan71855ee2021-05-25 19:05:06 -070036504 GemmMicrokernelTester()
36505 .mr(1)
36506 .nr(16)
36507 .kr(8)
36508 .sr(1)
36509 .m(m)
36510 .n(n)
36511 .k(8)
36512 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080036513 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070036514 }
36515 }
36516 }
36517
36518 TEST(QS8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_eq_8_subtile_m) {
36519 TEST_REQUIRES_X86_AVX512SKX;
36520 for (uint32_t m = 1; m <= 1; m++) {
36521 GemmMicrokernelTester()
36522 .mr(1)
36523 .nr(16)
36524 .kr(8)
36525 .sr(1)
36526 .m(m)
36527 .n(16)
36528 .k(8)
36529 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080036530 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070036531 }
36532 }
36533
36534 TEST(QS8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_eq_8_subtile_n) {
36535 TEST_REQUIRES_X86_AVX512SKX;
36536 for (uint32_t n = 1; n <= 16; n++) {
36537 GemmMicrokernelTester()
36538 .mr(1)
36539 .nr(16)
36540 .kr(8)
36541 .sr(1)
36542 .m(1)
36543 .n(n)
36544 .k(8)
36545 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080036546 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070036547 }
36548 }
36549
36550 TEST(QS8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_lt_8) {
36551 TEST_REQUIRES_X86_AVX512SKX;
36552 for (size_t k = 1; k < 8; k++) {
36553 GemmMicrokernelTester()
36554 .mr(1)
36555 .nr(16)
36556 .kr(8)
36557 .sr(1)
36558 .m(1)
36559 .n(16)
36560 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080036561 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070036562 }
36563 }
36564
36565 TEST(QS8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_lt_8_strided_a) {
36566 TEST_REQUIRES_X86_AVX512SKX;
36567 for (size_t k = 1; k < 8; k++) {
36568 GemmMicrokernelTester()
36569 .mr(1)
36570 .nr(16)
36571 .kr(8)
36572 .sr(1)
36573 .m(1)
36574 .n(16)
36575 .k(k)
36576 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080036577 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070036578 }
36579 }
36580
36581 TEST(QS8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_lt_8_subtile) {
36582 TEST_REQUIRES_X86_AVX512SKX;
36583 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080036584 for (uint32_t n = 1; n <= 16; n++) {
36585 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan71855ee2021-05-25 19:05:06 -070036586 GemmMicrokernelTester()
36587 .mr(1)
36588 .nr(16)
36589 .kr(8)
36590 .sr(1)
36591 .m(m)
36592 .n(n)
36593 .k(k)
36594 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080036595 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070036596 }
36597 }
36598 }
36599 }
36600
36601 TEST(QS8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_gt_8) {
36602 TEST_REQUIRES_X86_AVX512SKX;
36603 for (size_t k = 9; k < 16; k++) {
36604 GemmMicrokernelTester()
36605 .mr(1)
36606 .nr(16)
36607 .kr(8)
36608 .sr(1)
36609 .m(1)
36610 .n(16)
36611 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080036612 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070036613 }
36614 }
36615
36616 TEST(QS8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_gt_8_strided_a) {
36617 TEST_REQUIRES_X86_AVX512SKX;
36618 for (size_t k = 9; k < 16; k++) {
36619 GemmMicrokernelTester()
36620 .mr(1)
36621 .nr(16)
36622 .kr(8)
36623 .sr(1)
36624 .m(1)
36625 .n(16)
36626 .k(k)
36627 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080036628 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070036629 }
36630 }
36631
36632 TEST(QS8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_gt_8_subtile) {
36633 TEST_REQUIRES_X86_AVX512SKX;
36634 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080036635 for (uint32_t n = 1; n <= 16; n++) {
36636 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan71855ee2021-05-25 19:05:06 -070036637 GemmMicrokernelTester()
36638 .mr(1)
36639 .nr(16)
36640 .kr(8)
36641 .sr(1)
36642 .m(m)
36643 .n(n)
36644 .k(k)
36645 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080036646 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070036647 }
36648 }
36649 }
36650 }
36651
36652 TEST(QS8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_div_8) {
36653 TEST_REQUIRES_X86_AVX512SKX;
36654 for (size_t k = 16; k <= 80; k += 8) {
36655 GemmMicrokernelTester()
36656 .mr(1)
36657 .nr(16)
36658 .kr(8)
36659 .sr(1)
36660 .m(1)
36661 .n(16)
36662 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080036663 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070036664 }
36665 }
36666
36667 TEST(QS8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_div_8_strided_a) {
36668 TEST_REQUIRES_X86_AVX512SKX;
36669 for (size_t k = 16; k <= 80; k += 8) {
36670 GemmMicrokernelTester()
36671 .mr(1)
36672 .nr(16)
36673 .kr(8)
36674 .sr(1)
36675 .m(1)
36676 .n(16)
36677 .k(k)
36678 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080036679 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070036680 }
36681 }
36682
36683 TEST(QS8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_div_8_subtile) {
36684 TEST_REQUIRES_X86_AVX512SKX;
36685 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080036686 for (uint32_t n = 1; n <= 16; n++) {
36687 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan71855ee2021-05-25 19:05:06 -070036688 GemmMicrokernelTester()
36689 .mr(1)
36690 .nr(16)
36691 .kr(8)
36692 .sr(1)
36693 .m(m)
36694 .n(n)
36695 .k(k)
36696 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080036697 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070036698 }
36699 }
36700 }
36701 }
36702
36703 TEST(QS8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, n_gt_16) {
36704 TEST_REQUIRES_X86_AVX512SKX;
36705 for (uint32_t n = 17; n < 32; n++) {
36706 for (size_t k = 1; k <= 40; k += 9) {
36707 GemmMicrokernelTester()
36708 .mr(1)
36709 .nr(16)
36710 .kr(8)
36711 .sr(1)
36712 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080036713 .n(n)
Marat Dukhan71855ee2021-05-25 19:05:06 -070036714 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080036715 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070036716 }
36717 }
36718 }
36719
36720 TEST(QS8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, n_gt_16_strided_cn) {
36721 TEST_REQUIRES_X86_AVX512SKX;
36722 for (uint32_t n = 17; n < 32; n++) {
36723 for (size_t k = 1; k <= 40; k += 9) {
36724 GemmMicrokernelTester()
36725 .mr(1)
36726 .nr(16)
36727 .kr(8)
36728 .sr(1)
36729 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080036730 .n(n)
Marat Dukhan71855ee2021-05-25 19:05:06 -070036731 .k(k)
36732 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080036733 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070036734 }
36735 }
36736 }
36737
36738 TEST(QS8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, n_gt_16_strided_a) {
36739 TEST_REQUIRES_X86_AVX512SKX;
36740 for (uint32_t n = 17; n < 32; n++) {
36741 for (size_t k = 1; k <= 40; k += 9) {
36742 GemmMicrokernelTester()
36743 .mr(1)
36744 .nr(16)
36745 .kr(8)
36746 .sr(1)
36747 .m(1)
36748 .n(n)
36749 .k(k)
36750 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080036751 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070036752 }
36753 }
36754 }
36755
36756 TEST(QS8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, n_gt_16_subtile) {
36757 TEST_REQUIRES_X86_AVX512SKX;
36758 for (uint32_t n = 17; n < 32; n++) {
36759 for (size_t k = 1; k <= 40; k += 9) {
36760 for (uint32_t m = 1; m <= 1; m++) {
36761 GemmMicrokernelTester()
36762 .mr(1)
36763 .nr(16)
36764 .kr(8)
36765 .sr(1)
36766 .m(m)
36767 .n(n)
36768 .k(k)
36769 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080036770 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070036771 }
36772 }
36773 }
36774 }
36775
36776 TEST(QS8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, n_div_16) {
36777 TEST_REQUIRES_X86_AVX512SKX;
36778 for (uint32_t n = 32; n <= 48; n += 16) {
36779 for (size_t k = 1; k <= 40; k += 9) {
36780 GemmMicrokernelTester()
36781 .mr(1)
36782 .nr(16)
36783 .kr(8)
36784 .sr(1)
36785 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080036786 .n(n)
Marat Dukhan71855ee2021-05-25 19:05:06 -070036787 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080036788 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070036789 }
36790 }
36791 }
36792
36793 TEST(QS8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, n_div_16_strided_cn) {
36794 TEST_REQUIRES_X86_AVX512SKX;
36795 for (uint32_t n = 32; n <= 48; n += 16) {
36796 for (size_t k = 1; k <= 40; k += 9) {
36797 GemmMicrokernelTester()
36798 .mr(1)
36799 .nr(16)
36800 .kr(8)
36801 .sr(1)
36802 .m(1)
36803 .n(n)
36804 .k(k)
36805 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080036806 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070036807 }
36808 }
36809 }
36810
36811 TEST(QS8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, n_div_16_strided_a) {
36812 TEST_REQUIRES_X86_AVX512SKX;
36813 for (uint32_t n = 32; n <= 48; n += 16) {
36814 for (size_t k = 1; k <= 40; k += 9) {
36815 GemmMicrokernelTester()
36816 .mr(1)
36817 .nr(16)
36818 .kr(8)
36819 .sr(1)
36820 .m(1)
36821 .n(n)
36822 .k(k)
36823 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080036824 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070036825 }
36826 }
36827 }
36828
36829 TEST(QS8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, n_div_16_subtile) {
36830 TEST_REQUIRES_X86_AVX512SKX;
36831 for (uint32_t n = 32; n <= 48; n += 16) {
36832 for (size_t k = 1; k <= 40; k += 9) {
36833 for (uint32_t m = 1; m <= 1; m++) {
36834 GemmMicrokernelTester()
36835 .mr(1)
36836 .nr(16)
36837 .kr(8)
36838 .sr(1)
36839 .m(m)
36840 .n(n)
36841 .k(k)
36842 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080036843 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070036844 }
36845 }
36846 }
36847 }
36848
36849 TEST(QS8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, strided_cm_subtile) {
36850 TEST_REQUIRES_X86_AVX512SKX;
36851 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080036852 for (uint32_t n = 1; n <= 16; n++) {
36853 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan71855ee2021-05-25 19:05:06 -070036854 GemmMicrokernelTester()
36855 .mr(1)
36856 .nr(16)
36857 .kr(8)
36858 .sr(1)
36859 .m(m)
36860 .n(n)
36861 .k(k)
36862 .cm_stride(19)
36863 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080036864 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070036865 }
36866 }
36867 }
36868 }
36869
36870 TEST(QS8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, qmin) {
36871 TEST_REQUIRES_X86_AVX512SKX;
36872 GemmMicrokernelTester()
36873 .mr(1)
36874 .nr(16)
36875 .kr(8)
36876 .sr(1)
36877 .m(1)
36878 .n(16)
36879 .k(8)
36880 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080036881 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070036882 }
36883
36884 TEST(QS8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, qmax) {
36885 TEST_REQUIRES_X86_AVX512SKX;
36886 GemmMicrokernelTester()
36887 .mr(1)
36888 .nr(16)
36889 .kr(8)
36890 .sr(1)
36891 .m(1)
36892 .n(16)
36893 .k(8)
36894 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080036895 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070036896 }
36897
36898 TEST(QS8_GEMM_MINMAX_FP32_1X16C8__AVX512SKX, strided_cm) {
36899 TEST_REQUIRES_X86_AVX512SKX;
36900 GemmMicrokernelTester()
36901 .mr(1)
36902 .nr(16)
36903 .kr(8)
36904 .sr(1)
36905 .m(1)
36906 .n(16)
36907 .k(8)
36908 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080036909 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070036910 }
36911#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
36912
36913
36914#if XNN_ARCH_X86 || XNN_ARCH_X86_64
36915 TEST(QS8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_eq_8) {
36916 TEST_REQUIRES_X86_AVX512SKX;
36917 GemmMicrokernelTester()
36918 .mr(2)
36919 .nr(16)
36920 .kr(8)
36921 .sr(1)
36922 .m(2)
36923 .n(16)
36924 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080036925 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070036926 }
36927
36928 TEST(QS8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, strided_cn) {
36929 TEST_REQUIRES_X86_AVX512SKX;
36930 GemmMicrokernelTester()
36931 .mr(2)
36932 .nr(16)
36933 .kr(8)
36934 .sr(1)
36935 .m(2)
36936 .n(16)
36937 .k(8)
36938 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080036939 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070036940 }
36941
36942 TEST(QS8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_eq_8_strided_a) {
36943 TEST_REQUIRES_X86_AVX512SKX;
36944 GemmMicrokernelTester()
36945 .mr(2)
36946 .nr(16)
36947 .kr(8)
36948 .sr(1)
36949 .m(2)
36950 .n(16)
36951 .k(8)
36952 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080036953 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070036954 }
36955
36956 TEST(QS8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_eq_8_subtile) {
36957 TEST_REQUIRES_X86_AVX512SKX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080036958 for (uint32_t n = 1; n <= 16; n++) {
36959 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan71855ee2021-05-25 19:05:06 -070036960 GemmMicrokernelTester()
36961 .mr(2)
36962 .nr(16)
36963 .kr(8)
36964 .sr(1)
36965 .m(m)
36966 .n(n)
36967 .k(8)
36968 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080036969 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070036970 }
36971 }
36972 }
36973
36974 TEST(QS8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_eq_8_subtile_m) {
36975 TEST_REQUIRES_X86_AVX512SKX;
36976 for (uint32_t m = 1; m <= 2; m++) {
36977 GemmMicrokernelTester()
36978 .mr(2)
36979 .nr(16)
36980 .kr(8)
36981 .sr(1)
36982 .m(m)
36983 .n(16)
36984 .k(8)
36985 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080036986 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070036987 }
36988 }
36989
36990 TEST(QS8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_eq_8_subtile_n) {
36991 TEST_REQUIRES_X86_AVX512SKX;
36992 for (uint32_t n = 1; n <= 16; n++) {
36993 GemmMicrokernelTester()
36994 .mr(2)
36995 .nr(16)
36996 .kr(8)
36997 .sr(1)
36998 .m(2)
36999 .n(n)
37000 .k(8)
37001 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080037002 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070037003 }
37004 }
37005
37006 TEST(QS8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_lt_8) {
37007 TEST_REQUIRES_X86_AVX512SKX;
37008 for (size_t k = 1; k < 8; k++) {
37009 GemmMicrokernelTester()
37010 .mr(2)
37011 .nr(16)
37012 .kr(8)
37013 .sr(1)
37014 .m(2)
37015 .n(16)
37016 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080037017 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070037018 }
37019 }
37020
37021 TEST(QS8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_lt_8_strided_a) {
37022 TEST_REQUIRES_X86_AVX512SKX;
37023 for (size_t k = 1; k < 8; k++) {
37024 GemmMicrokernelTester()
37025 .mr(2)
37026 .nr(16)
37027 .kr(8)
37028 .sr(1)
37029 .m(2)
37030 .n(16)
37031 .k(k)
37032 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080037033 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070037034 }
37035 }
37036
37037 TEST(QS8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_lt_8_subtile) {
37038 TEST_REQUIRES_X86_AVX512SKX;
37039 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080037040 for (uint32_t n = 1; n <= 16; n++) {
37041 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan71855ee2021-05-25 19:05:06 -070037042 GemmMicrokernelTester()
37043 .mr(2)
37044 .nr(16)
37045 .kr(8)
37046 .sr(1)
37047 .m(m)
37048 .n(n)
37049 .k(k)
37050 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080037051 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070037052 }
37053 }
37054 }
37055 }
37056
37057 TEST(QS8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_gt_8) {
37058 TEST_REQUIRES_X86_AVX512SKX;
37059 for (size_t k = 9; k < 16; k++) {
37060 GemmMicrokernelTester()
37061 .mr(2)
37062 .nr(16)
37063 .kr(8)
37064 .sr(1)
37065 .m(2)
37066 .n(16)
37067 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080037068 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070037069 }
37070 }
37071
37072 TEST(QS8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_gt_8_strided_a) {
37073 TEST_REQUIRES_X86_AVX512SKX;
37074 for (size_t k = 9; k < 16; k++) {
37075 GemmMicrokernelTester()
37076 .mr(2)
37077 .nr(16)
37078 .kr(8)
37079 .sr(1)
37080 .m(2)
37081 .n(16)
37082 .k(k)
37083 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080037084 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070037085 }
37086 }
37087
37088 TEST(QS8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_gt_8_subtile) {
37089 TEST_REQUIRES_X86_AVX512SKX;
37090 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080037091 for (uint32_t n = 1; n <= 16; n++) {
37092 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan71855ee2021-05-25 19:05:06 -070037093 GemmMicrokernelTester()
37094 .mr(2)
37095 .nr(16)
37096 .kr(8)
37097 .sr(1)
37098 .m(m)
37099 .n(n)
37100 .k(k)
37101 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080037102 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070037103 }
37104 }
37105 }
37106 }
37107
37108 TEST(QS8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_div_8) {
37109 TEST_REQUIRES_X86_AVX512SKX;
37110 for (size_t k = 16; k <= 80; k += 8) {
37111 GemmMicrokernelTester()
37112 .mr(2)
37113 .nr(16)
37114 .kr(8)
37115 .sr(1)
37116 .m(2)
37117 .n(16)
37118 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080037119 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070037120 }
37121 }
37122
37123 TEST(QS8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_div_8_strided_a) {
37124 TEST_REQUIRES_X86_AVX512SKX;
37125 for (size_t k = 16; k <= 80; k += 8) {
37126 GemmMicrokernelTester()
37127 .mr(2)
37128 .nr(16)
37129 .kr(8)
37130 .sr(1)
37131 .m(2)
37132 .n(16)
37133 .k(k)
37134 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080037135 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070037136 }
37137 }
37138
37139 TEST(QS8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_div_8_subtile) {
37140 TEST_REQUIRES_X86_AVX512SKX;
37141 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080037142 for (uint32_t n = 1; n <= 16; n++) {
37143 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan71855ee2021-05-25 19:05:06 -070037144 GemmMicrokernelTester()
37145 .mr(2)
37146 .nr(16)
37147 .kr(8)
37148 .sr(1)
37149 .m(m)
37150 .n(n)
37151 .k(k)
37152 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080037153 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070037154 }
37155 }
37156 }
37157 }
37158
37159 TEST(QS8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_gt_16) {
37160 TEST_REQUIRES_X86_AVX512SKX;
37161 for (uint32_t n = 17; n < 32; n++) {
37162 for (size_t k = 1; k <= 40; k += 9) {
37163 GemmMicrokernelTester()
37164 .mr(2)
37165 .nr(16)
37166 .kr(8)
37167 .sr(1)
37168 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080037169 .n(n)
Marat Dukhan71855ee2021-05-25 19:05:06 -070037170 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080037171 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070037172 }
37173 }
37174 }
37175
37176 TEST(QS8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_gt_16_strided_cn) {
37177 TEST_REQUIRES_X86_AVX512SKX;
37178 for (uint32_t n = 17; n < 32; n++) {
37179 for (size_t k = 1; k <= 40; k += 9) {
37180 GemmMicrokernelTester()
37181 .mr(2)
37182 .nr(16)
37183 .kr(8)
37184 .sr(1)
37185 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080037186 .n(n)
Marat Dukhan71855ee2021-05-25 19:05:06 -070037187 .k(k)
37188 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080037189 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070037190 }
37191 }
37192 }
37193
37194 TEST(QS8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_gt_16_strided_a) {
37195 TEST_REQUIRES_X86_AVX512SKX;
37196 for (uint32_t n = 17; n < 32; n++) {
37197 for (size_t k = 1; k <= 40; k += 9) {
37198 GemmMicrokernelTester()
37199 .mr(2)
37200 .nr(16)
37201 .kr(8)
37202 .sr(1)
37203 .m(2)
37204 .n(n)
37205 .k(k)
37206 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080037207 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070037208 }
37209 }
37210 }
37211
37212 TEST(QS8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_gt_16_subtile) {
37213 TEST_REQUIRES_X86_AVX512SKX;
37214 for (uint32_t n = 17; n < 32; n++) {
37215 for (size_t k = 1; k <= 40; k += 9) {
37216 for (uint32_t m = 1; m <= 2; m++) {
37217 GemmMicrokernelTester()
37218 .mr(2)
37219 .nr(16)
37220 .kr(8)
37221 .sr(1)
37222 .m(m)
37223 .n(n)
37224 .k(k)
37225 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080037226 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070037227 }
37228 }
37229 }
37230 }
37231
37232 TEST(QS8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_div_16) {
37233 TEST_REQUIRES_X86_AVX512SKX;
37234 for (uint32_t n = 32; n <= 48; n += 16) {
37235 for (size_t k = 1; k <= 40; k += 9) {
37236 GemmMicrokernelTester()
37237 .mr(2)
37238 .nr(16)
37239 .kr(8)
37240 .sr(1)
37241 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080037242 .n(n)
Marat Dukhan71855ee2021-05-25 19:05:06 -070037243 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080037244 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070037245 }
37246 }
37247 }
37248
37249 TEST(QS8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_div_16_strided_cn) {
37250 TEST_REQUIRES_X86_AVX512SKX;
37251 for (uint32_t n = 32; n <= 48; n += 16) {
37252 for (size_t k = 1; k <= 40; k += 9) {
37253 GemmMicrokernelTester()
37254 .mr(2)
37255 .nr(16)
37256 .kr(8)
37257 .sr(1)
37258 .m(2)
37259 .n(n)
37260 .k(k)
37261 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080037262 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070037263 }
37264 }
37265 }
37266
37267 TEST(QS8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_div_16_strided_a) {
37268 TEST_REQUIRES_X86_AVX512SKX;
37269 for (uint32_t n = 32; n <= 48; n += 16) {
37270 for (size_t k = 1; k <= 40; k += 9) {
37271 GemmMicrokernelTester()
37272 .mr(2)
37273 .nr(16)
37274 .kr(8)
37275 .sr(1)
37276 .m(2)
37277 .n(n)
37278 .k(k)
37279 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080037280 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070037281 }
37282 }
37283 }
37284
37285 TEST(QS8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_div_16_subtile) {
37286 TEST_REQUIRES_X86_AVX512SKX;
37287 for (uint32_t n = 32; n <= 48; n += 16) {
37288 for (size_t k = 1; k <= 40; k += 9) {
37289 for (uint32_t m = 1; m <= 2; m++) {
37290 GemmMicrokernelTester()
37291 .mr(2)
37292 .nr(16)
37293 .kr(8)
37294 .sr(1)
37295 .m(m)
37296 .n(n)
37297 .k(k)
37298 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080037299 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070037300 }
37301 }
37302 }
37303 }
37304
37305 TEST(QS8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, strided_cm_subtile) {
37306 TEST_REQUIRES_X86_AVX512SKX;
37307 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080037308 for (uint32_t n = 1; n <= 16; n++) {
37309 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan71855ee2021-05-25 19:05:06 -070037310 GemmMicrokernelTester()
37311 .mr(2)
37312 .nr(16)
37313 .kr(8)
37314 .sr(1)
37315 .m(m)
37316 .n(n)
37317 .k(k)
37318 .cm_stride(19)
37319 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080037320 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070037321 }
37322 }
37323 }
37324 }
37325
37326 TEST(QS8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, qmin) {
37327 TEST_REQUIRES_X86_AVX512SKX;
37328 GemmMicrokernelTester()
37329 .mr(2)
37330 .nr(16)
37331 .kr(8)
37332 .sr(1)
37333 .m(2)
37334 .n(16)
37335 .k(8)
37336 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080037337 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070037338 }
37339
37340 TEST(QS8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, qmax) {
37341 TEST_REQUIRES_X86_AVX512SKX;
37342 GemmMicrokernelTester()
37343 .mr(2)
37344 .nr(16)
37345 .kr(8)
37346 .sr(1)
37347 .m(2)
37348 .n(16)
37349 .k(8)
37350 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080037351 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070037352 }
37353
37354 TEST(QS8_GEMM_MINMAX_FP32_2X16C8__AVX512SKX, strided_cm) {
37355 TEST_REQUIRES_X86_AVX512SKX;
37356 GemmMicrokernelTester()
37357 .mr(2)
37358 .nr(16)
37359 .kr(8)
37360 .sr(1)
37361 .m(2)
37362 .n(16)
37363 .k(8)
37364 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080037365 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070037366 }
37367#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
37368
37369
37370#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan71855ee2021-05-25 19:05:06 -070037371 TEST(QS8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_eq_8) {
37372 TEST_REQUIRES_X86_AVX512SKX;
37373 GemmMicrokernelTester()
37374 .mr(4)
37375 .nr(16)
37376 .kr(8)
37377 .sr(1)
37378 .m(4)
37379 .n(16)
37380 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080037381 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070037382 }
37383
37384 TEST(QS8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, strided_cn) {
37385 TEST_REQUIRES_X86_AVX512SKX;
37386 GemmMicrokernelTester()
37387 .mr(4)
37388 .nr(16)
37389 .kr(8)
37390 .sr(1)
37391 .m(4)
37392 .n(16)
37393 .k(8)
37394 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080037395 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070037396 }
37397
37398 TEST(QS8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_eq_8_strided_a) {
37399 TEST_REQUIRES_X86_AVX512SKX;
37400 GemmMicrokernelTester()
37401 .mr(4)
37402 .nr(16)
37403 .kr(8)
37404 .sr(1)
37405 .m(4)
37406 .n(16)
37407 .k(8)
37408 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080037409 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070037410 }
37411
37412 TEST(QS8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_eq_8_subtile) {
37413 TEST_REQUIRES_X86_AVX512SKX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080037414 for (uint32_t n = 1; n <= 16; n++) {
37415 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan71855ee2021-05-25 19:05:06 -070037416 GemmMicrokernelTester()
37417 .mr(4)
37418 .nr(16)
37419 .kr(8)
37420 .sr(1)
37421 .m(m)
37422 .n(n)
37423 .k(8)
37424 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080037425 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070037426 }
37427 }
37428 }
37429
37430 TEST(QS8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_eq_8_subtile_m) {
37431 TEST_REQUIRES_X86_AVX512SKX;
37432 for (uint32_t m = 1; m <= 4; m++) {
37433 GemmMicrokernelTester()
37434 .mr(4)
37435 .nr(16)
37436 .kr(8)
37437 .sr(1)
37438 .m(m)
37439 .n(16)
37440 .k(8)
37441 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080037442 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070037443 }
37444 }
37445
37446 TEST(QS8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_eq_8_subtile_n) {
37447 TEST_REQUIRES_X86_AVX512SKX;
37448 for (uint32_t n = 1; n <= 16; n++) {
37449 GemmMicrokernelTester()
37450 .mr(4)
37451 .nr(16)
37452 .kr(8)
37453 .sr(1)
37454 .m(4)
37455 .n(n)
37456 .k(8)
37457 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080037458 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070037459 }
37460 }
37461
37462 TEST(QS8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_lt_8) {
37463 TEST_REQUIRES_X86_AVX512SKX;
37464 for (size_t k = 1; k < 8; k++) {
37465 GemmMicrokernelTester()
37466 .mr(4)
37467 .nr(16)
37468 .kr(8)
37469 .sr(1)
37470 .m(4)
37471 .n(16)
37472 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080037473 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070037474 }
37475 }
37476
37477 TEST(QS8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_lt_8_strided_a) {
37478 TEST_REQUIRES_X86_AVX512SKX;
37479 for (size_t k = 1; k < 8; k++) {
37480 GemmMicrokernelTester()
37481 .mr(4)
37482 .nr(16)
37483 .kr(8)
37484 .sr(1)
37485 .m(4)
37486 .n(16)
37487 .k(k)
37488 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080037489 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070037490 }
37491 }
37492
37493 TEST(QS8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_lt_8_subtile) {
37494 TEST_REQUIRES_X86_AVX512SKX;
37495 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080037496 for (uint32_t n = 1; n <= 16; n++) {
37497 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan71855ee2021-05-25 19:05:06 -070037498 GemmMicrokernelTester()
37499 .mr(4)
37500 .nr(16)
37501 .kr(8)
37502 .sr(1)
37503 .m(m)
37504 .n(n)
37505 .k(k)
37506 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080037507 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070037508 }
37509 }
37510 }
37511 }
37512
37513 TEST(QS8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_gt_8) {
37514 TEST_REQUIRES_X86_AVX512SKX;
37515 for (size_t k = 9; k < 16; k++) {
37516 GemmMicrokernelTester()
37517 .mr(4)
37518 .nr(16)
37519 .kr(8)
37520 .sr(1)
37521 .m(4)
37522 .n(16)
37523 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080037524 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070037525 }
37526 }
37527
37528 TEST(QS8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_gt_8_strided_a) {
37529 TEST_REQUIRES_X86_AVX512SKX;
37530 for (size_t k = 9; k < 16; k++) {
37531 GemmMicrokernelTester()
37532 .mr(4)
37533 .nr(16)
37534 .kr(8)
37535 .sr(1)
37536 .m(4)
37537 .n(16)
37538 .k(k)
37539 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080037540 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070037541 }
37542 }
37543
37544 TEST(QS8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_gt_8_subtile) {
37545 TEST_REQUIRES_X86_AVX512SKX;
37546 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080037547 for (uint32_t n = 1; n <= 16; n++) {
37548 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan71855ee2021-05-25 19:05:06 -070037549 GemmMicrokernelTester()
37550 .mr(4)
37551 .nr(16)
37552 .kr(8)
37553 .sr(1)
37554 .m(m)
37555 .n(n)
37556 .k(k)
37557 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080037558 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070037559 }
37560 }
37561 }
37562 }
37563
37564 TEST(QS8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_div_8) {
37565 TEST_REQUIRES_X86_AVX512SKX;
37566 for (size_t k = 16; k <= 80; k += 8) {
37567 GemmMicrokernelTester()
37568 .mr(4)
37569 .nr(16)
37570 .kr(8)
37571 .sr(1)
37572 .m(4)
37573 .n(16)
37574 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080037575 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070037576 }
37577 }
37578
37579 TEST(QS8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_div_8_strided_a) {
37580 TEST_REQUIRES_X86_AVX512SKX;
37581 for (size_t k = 16; k <= 80; k += 8) {
37582 GemmMicrokernelTester()
37583 .mr(4)
37584 .nr(16)
37585 .kr(8)
37586 .sr(1)
37587 .m(4)
37588 .n(16)
37589 .k(k)
37590 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080037591 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070037592 }
37593 }
37594
37595 TEST(QS8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_div_8_subtile) {
37596 TEST_REQUIRES_X86_AVX512SKX;
37597 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080037598 for (uint32_t n = 1; n <= 16; n++) {
37599 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan71855ee2021-05-25 19:05:06 -070037600 GemmMicrokernelTester()
37601 .mr(4)
37602 .nr(16)
37603 .kr(8)
37604 .sr(1)
37605 .m(m)
37606 .n(n)
37607 .k(k)
37608 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080037609 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070037610 }
37611 }
37612 }
37613 }
37614
37615 TEST(QS8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, n_gt_16) {
37616 TEST_REQUIRES_X86_AVX512SKX;
37617 for (uint32_t n = 17; n < 32; n++) {
37618 for (size_t k = 1; k <= 40; k += 9) {
37619 GemmMicrokernelTester()
37620 .mr(4)
37621 .nr(16)
37622 .kr(8)
37623 .sr(1)
37624 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080037625 .n(n)
Marat Dukhan71855ee2021-05-25 19:05:06 -070037626 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080037627 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070037628 }
37629 }
37630 }
37631
37632 TEST(QS8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, n_gt_16_strided_cn) {
37633 TEST_REQUIRES_X86_AVX512SKX;
37634 for (uint32_t n = 17; n < 32; n++) {
37635 for (size_t k = 1; k <= 40; k += 9) {
37636 GemmMicrokernelTester()
37637 .mr(4)
37638 .nr(16)
37639 .kr(8)
37640 .sr(1)
37641 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080037642 .n(n)
Marat Dukhan71855ee2021-05-25 19:05:06 -070037643 .k(k)
37644 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080037645 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070037646 }
37647 }
37648 }
37649
37650 TEST(QS8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, n_gt_16_strided_a) {
37651 TEST_REQUIRES_X86_AVX512SKX;
37652 for (uint32_t n = 17; n < 32; n++) {
37653 for (size_t k = 1; k <= 40; k += 9) {
37654 GemmMicrokernelTester()
37655 .mr(4)
37656 .nr(16)
37657 .kr(8)
37658 .sr(1)
37659 .m(4)
37660 .n(n)
37661 .k(k)
37662 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080037663 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070037664 }
37665 }
37666 }
37667
37668 TEST(QS8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, n_gt_16_subtile) {
37669 TEST_REQUIRES_X86_AVX512SKX;
37670 for (uint32_t n = 17; n < 32; n++) {
37671 for (size_t k = 1; k <= 40; k += 9) {
37672 for (uint32_t m = 1; m <= 4; m++) {
37673 GemmMicrokernelTester()
37674 .mr(4)
37675 .nr(16)
37676 .kr(8)
37677 .sr(1)
37678 .m(m)
37679 .n(n)
37680 .k(k)
37681 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080037682 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070037683 }
37684 }
37685 }
37686 }
37687
37688 TEST(QS8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, n_div_16) {
37689 TEST_REQUIRES_X86_AVX512SKX;
37690 for (uint32_t n = 32; n <= 48; n += 16) {
37691 for (size_t k = 1; k <= 40; k += 9) {
37692 GemmMicrokernelTester()
37693 .mr(4)
37694 .nr(16)
37695 .kr(8)
37696 .sr(1)
37697 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080037698 .n(n)
Marat Dukhan71855ee2021-05-25 19:05:06 -070037699 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080037700 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070037701 }
37702 }
37703 }
37704
37705 TEST(QS8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, n_div_16_strided_cn) {
37706 TEST_REQUIRES_X86_AVX512SKX;
37707 for (uint32_t n = 32; n <= 48; n += 16) {
37708 for (size_t k = 1; k <= 40; k += 9) {
37709 GemmMicrokernelTester()
37710 .mr(4)
37711 .nr(16)
37712 .kr(8)
37713 .sr(1)
37714 .m(4)
37715 .n(n)
37716 .k(k)
37717 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080037718 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070037719 }
37720 }
37721 }
37722
37723 TEST(QS8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, n_div_16_strided_a) {
37724 TEST_REQUIRES_X86_AVX512SKX;
37725 for (uint32_t n = 32; n <= 48; n += 16) {
37726 for (size_t k = 1; k <= 40; k += 9) {
37727 GemmMicrokernelTester()
37728 .mr(4)
37729 .nr(16)
37730 .kr(8)
37731 .sr(1)
37732 .m(4)
37733 .n(n)
37734 .k(k)
37735 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080037736 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070037737 }
37738 }
37739 }
37740
37741 TEST(QS8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, n_div_16_subtile) {
37742 TEST_REQUIRES_X86_AVX512SKX;
37743 for (uint32_t n = 32; n <= 48; n += 16) {
37744 for (size_t k = 1; k <= 40; k += 9) {
37745 for (uint32_t m = 1; m <= 4; m++) {
37746 GemmMicrokernelTester()
37747 .mr(4)
37748 .nr(16)
37749 .kr(8)
37750 .sr(1)
37751 .m(m)
37752 .n(n)
37753 .k(k)
37754 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080037755 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070037756 }
37757 }
37758 }
37759 }
37760
37761 TEST(QS8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, strided_cm_subtile) {
37762 TEST_REQUIRES_X86_AVX512SKX;
37763 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080037764 for (uint32_t n = 1; n <= 16; n++) {
37765 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan71855ee2021-05-25 19:05:06 -070037766 GemmMicrokernelTester()
37767 .mr(4)
37768 .nr(16)
37769 .kr(8)
37770 .sr(1)
37771 .m(m)
37772 .n(n)
37773 .k(k)
37774 .cm_stride(19)
37775 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080037776 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070037777 }
37778 }
37779 }
37780 }
37781
37782 TEST(QS8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, qmin) {
37783 TEST_REQUIRES_X86_AVX512SKX;
37784 GemmMicrokernelTester()
37785 .mr(4)
37786 .nr(16)
37787 .kr(8)
37788 .sr(1)
37789 .m(4)
37790 .n(16)
37791 .k(8)
37792 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080037793 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070037794 }
37795
37796 TEST(QS8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, qmax) {
37797 TEST_REQUIRES_X86_AVX512SKX;
37798 GemmMicrokernelTester()
37799 .mr(4)
37800 .nr(16)
37801 .kr(8)
37802 .sr(1)
37803 .m(4)
37804 .n(16)
37805 .k(8)
37806 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080037807 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070037808 }
37809
37810 TEST(QS8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, strided_cm) {
37811 TEST_REQUIRES_X86_AVX512SKX;
37812 GemmMicrokernelTester()
37813 .mr(4)
37814 .nr(16)
37815 .kr(8)
37816 .sr(1)
37817 .m(4)
37818 .n(16)
37819 .k(8)
37820 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080037821 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
Marat Dukhan71855ee2021-05-25 19:05:06 -070037822 }
37823#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan779b2532021-06-29 14:14:13 -070037824
37825
Marat Dukhan4c617792021-12-21 15:47:58 -080037826#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan8dc106e2021-08-31 15:23:02 -070037827 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8) {
37828 GemmMicrokernelTester()
37829 .mr(1)
37830 .nr(4)
37831 .kr(2)
37832 .sr(1)
37833 .m(1)
37834 .n(4)
37835 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080037836 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070037837 }
37838
37839 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, strided_cn) {
37840 GemmMicrokernelTester()
37841 .mr(1)
37842 .nr(4)
37843 .kr(2)
37844 .sr(1)
37845 .m(1)
37846 .n(4)
37847 .k(8)
37848 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080037849 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070037850 }
37851
37852 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_strided_a) {
37853 GemmMicrokernelTester()
37854 .mr(1)
37855 .nr(4)
37856 .kr(2)
37857 .sr(1)
37858 .m(1)
37859 .n(4)
37860 .k(8)
37861 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080037862 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070037863 }
37864
37865 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080037866 for (uint32_t n = 1; n <= 4; n++) {
37867 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070037868 GemmMicrokernelTester()
37869 .mr(1)
37870 .nr(4)
37871 .kr(2)
37872 .sr(1)
37873 .m(m)
37874 .n(n)
37875 .k(8)
37876 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080037877 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070037878 }
37879 }
37880 }
37881
37882 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
37883 for (uint32_t m = 1; m <= 1; m++) {
37884 GemmMicrokernelTester()
37885 .mr(1)
37886 .nr(4)
37887 .kr(2)
37888 .sr(1)
37889 .m(m)
37890 .n(4)
37891 .k(8)
37892 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080037893 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070037894 }
37895 }
37896
37897 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
37898 for (uint32_t n = 1; n <= 4; n++) {
37899 GemmMicrokernelTester()
37900 .mr(1)
37901 .nr(4)
37902 .kr(2)
37903 .sr(1)
37904 .m(1)
37905 .n(n)
37906 .k(8)
37907 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080037908 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070037909 }
37910 }
37911
37912 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8) {
37913 for (size_t k = 1; k < 8; k++) {
37914 GemmMicrokernelTester()
37915 .mr(1)
37916 .nr(4)
37917 .kr(2)
37918 .sr(1)
37919 .m(1)
37920 .n(4)
37921 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080037922 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070037923 }
37924 }
37925
37926 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8_strided_a) {
37927 for (size_t k = 1; k < 8; k++) {
37928 GemmMicrokernelTester()
37929 .mr(1)
37930 .nr(4)
37931 .kr(2)
37932 .sr(1)
37933 .m(1)
37934 .n(4)
37935 .k(k)
37936 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080037937 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070037938 }
37939 }
37940
37941 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
37942 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080037943 for (uint32_t n = 1; n <= 4; n++) {
37944 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070037945 GemmMicrokernelTester()
37946 .mr(1)
37947 .nr(4)
37948 .kr(2)
37949 .sr(1)
37950 .m(m)
37951 .n(n)
37952 .k(k)
37953 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080037954 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070037955 }
37956 }
37957 }
37958 }
37959
37960 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8) {
37961 for (size_t k = 9; k < 16; k++) {
37962 GemmMicrokernelTester()
37963 .mr(1)
37964 .nr(4)
37965 .kr(2)
37966 .sr(1)
37967 .m(1)
37968 .n(4)
37969 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080037970 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070037971 }
37972 }
37973
37974 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8_strided_a) {
37975 for (size_t k = 9; k < 16; k++) {
37976 GemmMicrokernelTester()
37977 .mr(1)
37978 .nr(4)
37979 .kr(2)
37980 .sr(1)
37981 .m(1)
37982 .n(4)
37983 .k(k)
37984 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080037985 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070037986 }
37987 }
37988
37989 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
37990 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080037991 for (uint32_t n = 1; n <= 4; n++) {
37992 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070037993 GemmMicrokernelTester()
37994 .mr(1)
37995 .nr(4)
37996 .kr(2)
37997 .sr(1)
37998 .m(m)
37999 .n(n)
38000 .k(k)
38001 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080038002 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038003 }
38004 }
38005 }
38006 }
38007
38008 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_div_8) {
38009 for (size_t k = 16; k <= 80; k += 8) {
38010 GemmMicrokernelTester()
38011 .mr(1)
38012 .nr(4)
38013 .kr(2)
38014 .sr(1)
38015 .m(1)
38016 .n(4)
38017 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080038018 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038019 }
38020 }
38021
38022 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_div_8_strided_a) {
38023 for (size_t k = 16; k <= 80; k += 8) {
38024 GemmMicrokernelTester()
38025 .mr(1)
38026 .nr(4)
38027 .kr(2)
38028 .sr(1)
38029 .m(1)
38030 .n(4)
38031 .k(k)
38032 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080038033 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038034 }
38035 }
38036
38037 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
38038 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080038039 for (uint32_t n = 1; n <= 4; n++) {
38040 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038041 GemmMicrokernelTester()
38042 .mr(1)
38043 .nr(4)
38044 .kr(2)
38045 .sr(1)
38046 .m(m)
38047 .n(n)
38048 .k(k)
38049 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080038050 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038051 }
38052 }
38053 }
38054 }
38055
38056 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4) {
38057 for (uint32_t n = 5; n < 8; n++) {
38058 for (size_t k = 1; k <= 40; k += 9) {
38059 GemmMicrokernelTester()
38060 .mr(1)
38061 .nr(4)
38062 .kr(2)
38063 .sr(1)
38064 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080038065 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038066 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080038067 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038068 }
38069 }
38070 }
38071
38072 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
38073 for (uint32_t n = 5; n < 8; n++) {
38074 for (size_t k = 1; k <= 40; k += 9) {
38075 GemmMicrokernelTester()
38076 .mr(1)
38077 .nr(4)
38078 .kr(2)
38079 .sr(1)
38080 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080038081 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038082 .k(k)
38083 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080038084 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038085 }
38086 }
38087 }
38088
38089 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_a) {
38090 for (uint32_t n = 5; n < 8; n++) {
38091 for (size_t k = 1; k <= 40; k += 9) {
38092 GemmMicrokernelTester()
38093 .mr(1)
38094 .nr(4)
38095 .kr(2)
38096 .sr(1)
38097 .m(1)
38098 .n(n)
38099 .k(k)
38100 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080038101 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038102 }
38103 }
38104 }
38105
38106 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
38107 for (uint32_t n = 5; n < 8; n++) {
38108 for (size_t k = 1; k <= 40; k += 9) {
38109 for (uint32_t m = 1; m <= 1; m++) {
38110 GemmMicrokernelTester()
38111 .mr(1)
38112 .nr(4)
38113 .kr(2)
38114 .sr(1)
38115 .m(m)
38116 .n(n)
38117 .k(k)
38118 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080038119 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038120 }
38121 }
38122 }
38123 }
38124
38125 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, n_div_4) {
38126 for (uint32_t n = 8; n <= 12; n += 4) {
38127 for (size_t k = 1; k <= 40; k += 9) {
38128 GemmMicrokernelTester()
38129 .mr(1)
38130 .nr(4)
38131 .kr(2)
38132 .sr(1)
38133 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080038134 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038135 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080038136 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038137 }
38138 }
38139 }
38140
38141 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
38142 for (uint32_t n = 8; n <= 12; n += 4) {
38143 for (size_t k = 1; k <= 40; k += 9) {
38144 GemmMicrokernelTester()
38145 .mr(1)
38146 .nr(4)
38147 .kr(2)
38148 .sr(1)
38149 .m(1)
38150 .n(n)
38151 .k(k)
38152 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080038153 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038154 }
38155 }
38156 }
38157
38158 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_strided_a) {
38159 for (uint32_t n = 8; n <= 12; n += 4) {
38160 for (size_t k = 1; k <= 40; k += 9) {
38161 GemmMicrokernelTester()
38162 .mr(1)
38163 .nr(4)
38164 .kr(2)
38165 .sr(1)
38166 .m(1)
38167 .n(n)
38168 .k(k)
38169 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080038170 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038171 }
38172 }
38173 }
38174
38175 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
38176 for (uint32_t n = 8; n <= 12; n += 4) {
38177 for (size_t k = 1; k <= 40; k += 9) {
38178 for (uint32_t m = 1; m <= 1; m++) {
38179 GemmMicrokernelTester()
38180 .mr(1)
38181 .nr(4)
38182 .kr(2)
38183 .sr(1)
38184 .m(m)
38185 .n(n)
38186 .k(k)
38187 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080038188 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038189 }
38190 }
38191 }
38192 }
38193
38194 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
38195 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080038196 for (uint32_t n = 1; n <= 4; n++) {
38197 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038198 GemmMicrokernelTester()
38199 .mr(1)
38200 .nr(4)
38201 .kr(2)
38202 .sr(1)
38203 .m(m)
38204 .n(n)
38205 .k(k)
38206 .cm_stride(7)
38207 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080038208 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038209 }
38210 }
38211 }
38212 }
38213
38214 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, qmin) {
38215 GemmMicrokernelTester()
38216 .mr(1)
38217 .nr(4)
38218 .kr(2)
38219 .sr(1)
38220 .m(1)
38221 .n(4)
38222 .k(8)
38223 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080038224 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038225 }
38226
38227 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, qmax) {
38228 GemmMicrokernelTester()
38229 .mr(1)
38230 .nr(4)
38231 .kr(2)
38232 .sr(1)
38233 .m(1)
38234 .n(4)
38235 .k(8)
38236 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080038237 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038238 }
38239
38240 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, strided_cm) {
38241 GemmMicrokernelTester()
38242 .mr(1)
38243 .nr(4)
38244 .kr(2)
38245 .sr(1)
38246 .m(1)
38247 .n(4)
38248 .k(8)
38249 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080038250 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038251 }
Marat Dukhan4c617792021-12-21 15:47:58 -080038252#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038253
38254
Marat Dukhan4c617792021-12-21 15:47:58 -080038255#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038256 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8) {
38257 GemmMicrokernelTester()
38258 .mr(3)
38259 .nr(4)
38260 .kr(2)
38261 .sr(1)
38262 .m(3)
38263 .n(4)
38264 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080038265 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038266 }
38267
38268 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, strided_cn) {
38269 GemmMicrokernelTester()
38270 .mr(3)
38271 .nr(4)
38272 .kr(2)
38273 .sr(1)
38274 .m(3)
38275 .n(4)
38276 .k(8)
38277 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080038278 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038279 }
38280
38281 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_strided_a) {
38282 GemmMicrokernelTester()
38283 .mr(3)
38284 .nr(4)
38285 .kr(2)
38286 .sr(1)
38287 .m(3)
38288 .n(4)
38289 .k(8)
38290 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080038291 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038292 }
38293
38294 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080038295 for (uint32_t n = 1; n <= 4; n++) {
38296 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038297 GemmMicrokernelTester()
38298 .mr(3)
38299 .nr(4)
38300 .kr(2)
38301 .sr(1)
38302 .m(m)
38303 .n(n)
38304 .k(8)
38305 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080038306 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038307 }
38308 }
38309 }
38310
38311 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
38312 for (uint32_t m = 1; m <= 3; m++) {
38313 GemmMicrokernelTester()
38314 .mr(3)
38315 .nr(4)
38316 .kr(2)
38317 .sr(1)
38318 .m(m)
38319 .n(4)
38320 .k(8)
38321 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080038322 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038323 }
38324 }
38325
38326 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
38327 for (uint32_t n = 1; n <= 4; n++) {
38328 GemmMicrokernelTester()
38329 .mr(3)
38330 .nr(4)
38331 .kr(2)
38332 .sr(1)
38333 .m(3)
38334 .n(n)
38335 .k(8)
38336 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080038337 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038338 }
38339 }
38340
38341 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8) {
38342 for (size_t k = 1; k < 8; k++) {
38343 GemmMicrokernelTester()
38344 .mr(3)
38345 .nr(4)
38346 .kr(2)
38347 .sr(1)
38348 .m(3)
38349 .n(4)
38350 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080038351 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038352 }
38353 }
38354
38355 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8_strided_a) {
38356 for (size_t k = 1; k < 8; k++) {
38357 GemmMicrokernelTester()
38358 .mr(3)
38359 .nr(4)
38360 .kr(2)
38361 .sr(1)
38362 .m(3)
38363 .n(4)
38364 .k(k)
38365 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080038366 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038367 }
38368 }
38369
38370 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
38371 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080038372 for (uint32_t n = 1; n <= 4; n++) {
38373 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038374 GemmMicrokernelTester()
38375 .mr(3)
38376 .nr(4)
38377 .kr(2)
38378 .sr(1)
38379 .m(m)
38380 .n(n)
38381 .k(k)
38382 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080038383 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038384 }
38385 }
38386 }
38387 }
38388
38389 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8) {
38390 for (size_t k = 9; k < 16; k++) {
38391 GemmMicrokernelTester()
38392 .mr(3)
38393 .nr(4)
38394 .kr(2)
38395 .sr(1)
38396 .m(3)
38397 .n(4)
38398 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080038399 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038400 }
38401 }
38402
38403 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8_strided_a) {
38404 for (size_t k = 9; k < 16; k++) {
38405 GemmMicrokernelTester()
38406 .mr(3)
38407 .nr(4)
38408 .kr(2)
38409 .sr(1)
38410 .m(3)
38411 .n(4)
38412 .k(k)
38413 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080038414 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038415 }
38416 }
38417
38418 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
38419 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080038420 for (uint32_t n = 1; n <= 4; n++) {
38421 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038422 GemmMicrokernelTester()
38423 .mr(3)
38424 .nr(4)
38425 .kr(2)
38426 .sr(1)
38427 .m(m)
38428 .n(n)
38429 .k(k)
38430 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080038431 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038432 }
38433 }
38434 }
38435 }
38436
38437 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_div_8) {
38438 for (size_t k = 16; k <= 80; k += 8) {
38439 GemmMicrokernelTester()
38440 .mr(3)
38441 .nr(4)
38442 .kr(2)
38443 .sr(1)
38444 .m(3)
38445 .n(4)
38446 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080038447 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038448 }
38449 }
38450
38451 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_div_8_strided_a) {
38452 for (size_t k = 16; k <= 80; k += 8) {
38453 GemmMicrokernelTester()
38454 .mr(3)
38455 .nr(4)
38456 .kr(2)
38457 .sr(1)
38458 .m(3)
38459 .n(4)
38460 .k(k)
38461 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080038462 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038463 }
38464 }
38465
38466 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
38467 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080038468 for (uint32_t n = 1; n <= 4; n++) {
38469 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038470 GemmMicrokernelTester()
38471 .mr(3)
38472 .nr(4)
38473 .kr(2)
38474 .sr(1)
38475 .m(m)
38476 .n(n)
38477 .k(k)
38478 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080038479 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038480 }
38481 }
38482 }
38483 }
38484
38485 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4) {
38486 for (uint32_t n = 5; n < 8; n++) {
38487 for (size_t k = 1; k <= 40; k += 9) {
38488 GemmMicrokernelTester()
38489 .mr(3)
38490 .nr(4)
38491 .kr(2)
38492 .sr(1)
38493 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080038494 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038495 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080038496 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038497 }
38498 }
38499 }
38500
38501 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
38502 for (uint32_t n = 5; n < 8; n++) {
38503 for (size_t k = 1; k <= 40; k += 9) {
38504 GemmMicrokernelTester()
38505 .mr(3)
38506 .nr(4)
38507 .kr(2)
38508 .sr(1)
38509 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080038510 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038511 .k(k)
38512 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080038513 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038514 }
38515 }
38516 }
38517
38518 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_a) {
38519 for (uint32_t n = 5; n < 8; n++) {
38520 for (size_t k = 1; k <= 40; k += 9) {
38521 GemmMicrokernelTester()
38522 .mr(3)
38523 .nr(4)
38524 .kr(2)
38525 .sr(1)
38526 .m(3)
38527 .n(n)
38528 .k(k)
38529 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080038530 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038531 }
38532 }
38533 }
38534
38535 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
38536 for (uint32_t n = 5; n < 8; n++) {
38537 for (size_t k = 1; k <= 40; k += 9) {
38538 for (uint32_t m = 1; m <= 3; m++) {
38539 GemmMicrokernelTester()
38540 .mr(3)
38541 .nr(4)
38542 .kr(2)
38543 .sr(1)
38544 .m(m)
38545 .n(n)
38546 .k(k)
38547 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080038548 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038549 }
38550 }
38551 }
38552 }
38553
38554 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, n_div_4) {
38555 for (uint32_t n = 8; n <= 12; n += 4) {
38556 for (size_t k = 1; k <= 40; k += 9) {
38557 GemmMicrokernelTester()
38558 .mr(3)
38559 .nr(4)
38560 .kr(2)
38561 .sr(1)
38562 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080038563 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038564 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080038565 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038566 }
38567 }
38568 }
38569
38570 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
38571 for (uint32_t n = 8; n <= 12; n += 4) {
38572 for (size_t k = 1; k <= 40; k += 9) {
38573 GemmMicrokernelTester()
38574 .mr(3)
38575 .nr(4)
38576 .kr(2)
38577 .sr(1)
38578 .m(3)
38579 .n(n)
38580 .k(k)
38581 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080038582 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038583 }
38584 }
38585 }
38586
38587 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_strided_a) {
38588 for (uint32_t n = 8; n <= 12; n += 4) {
38589 for (size_t k = 1; k <= 40; k += 9) {
38590 GemmMicrokernelTester()
38591 .mr(3)
38592 .nr(4)
38593 .kr(2)
38594 .sr(1)
38595 .m(3)
38596 .n(n)
38597 .k(k)
38598 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080038599 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038600 }
38601 }
38602 }
38603
38604 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
38605 for (uint32_t n = 8; n <= 12; n += 4) {
38606 for (size_t k = 1; k <= 40; k += 9) {
38607 for (uint32_t m = 1; m <= 3; m++) {
38608 GemmMicrokernelTester()
38609 .mr(3)
38610 .nr(4)
38611 .kr(2)
38612 .sr(1)
38613 .m(m)
38614 .n(n)
38615 .k(k)
38616 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080038617 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038618 }
38619 }
38620 }
38621 }
38622
38623 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
38624 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080038625 for (uint32_t n = 1; n <= 4; n++) {
38626 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038627 GemmMicrokernelTester()
38628 .mr(3)
38629 .nr(4)
38630 .kr(2)
38631 .sr(1)
38632 .m(m)
38633 .n(n)
38634 .k(k)
38635 .cm_stride(7)
38636 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080038637 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038638 }
38639 }
38640 }
38641 }
38642
38643 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, qmin) {
38644 GemmMicrokernelTester()
38645 .mr(3)
38646 .nr(4)
38647 .kr(2)
38648 .sr(1)
38649 .m(3)
38650 .n(4)
38651 .k(8)
38652 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080038653 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038654 }
38655
38656 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, qmax) {
38657 GemmMicrokernelTester()
38658 .mr(3)
38659 .nr(4)
38660 .kr(2)
38661 .sr(1)
38662 .m(3)
38663 .n(4)
38664 .k(8)
38665 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080038666 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038667 }
38668
38669 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, strided_cm) {
38670 GemmMicrokernelTester()
38671 .mr(3)
38672 .nr(4)
38673 .kr(2)
38674 .sr(1)
38675 .m(3)
38676 .n(4)
38677 .k(8)
38678 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080038679 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038680 }
Marat Dukhan4c617792021-12-21 15:47:58 -080038681#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038682
38683
Marat Dukhan4c617792021-12-21 15:47:58 -080038684#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038685 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8) {
38686 GemmMicrokernelTester()
38687 .mr(1)
38688 .nr(4)
38689 .kr(2)
38690 .sr(1)
38691 .m(1)
38692 .n(4)
38693 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080038694 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038695 }
38696
38697 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, strided_cn) {
38698 GemmMicrokernelTester()
38699 .mr(1)
38700 .nr(4)
38701 .kr(2)
38702 .sr(1)
38703 .m(1)
38704 .n(4)
38705 .k(8)
38706 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080038707 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038708 }
38709
38710 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_strided_a) {
38711 GemmMicrokernelTester()
38712 .mr(1)
38713 .nr(4)
38714 .kr(2)
38715 .sr(1)
38716 .m(1)
38717 .n(4)
38718 .k(8)
38719 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080038720 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038721 }
38722
38723 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080038724 for (uint32_t n = 1; n <= 4; n++) {
38725 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038726 GemmMicrokernelTester()
38727 .mr(1)
38728 .nr(4)
38729 .kr(2)
38730 .sr(1)
38731 .m(m)
38732 .n(n)
38733 .k(8)
38734 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080038735 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038736 }
38737 }
38738 }
38739
38740 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
38741 for (uint32_t m = 1; m <= 1; m++) {
38742 GemmMicrokernelTester()
38743 .mr(1)
38744 .nr(4)
38745 .kr(2)
38746 .sr(1)
38747 .m(m)
38748 .n(4)
38749 .k(8)
38750 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080038751 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038752 }
38753 }
38754
38755 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
38756 for (uint32_t n = 1; n <= 4; n++) {
38757 GemmMicrokernelTester()
38758 .mr(1)
38759 .nr(4)
38760 .kr(2)
38761 .sr(1)
38762 .m(1)
38763 .n(n)
38764 .k(8)
38765 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080038766 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038767 }
38768 }
38769
38770 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_lt_8) {
38771 for (size_t k = 1; k < 8; k++) {
38772 GemmMicrokernelTester()
38773 .mr(1)
38774 .nr(4)
38775 .kr(2)
38776 .sr(1)
38777 .m(1)
38778 .n(4)
38779 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080038780 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038781 }
38782 }
38783
38784 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_lt_8_strided_a) {
38785 for (size_t k = 1; k < 8; k++) {
38786 GemmMicrokernelTester()
38787 .mr(1)
38788 .nr(4)
38789 .kr(2)
38790 .sr(1)
38791 .m(1)
38792 .n(4)
38793 .k(k)
38794 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080038795 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038796 }
38797 }
38798
38799 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
38800 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080038801 for (uint32_t n = 1; n <= 4; n++) {
38802 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038803 GemmMicrokernelTester()
38804 .mr(1)
38805 .nr(4)
38806 .kr(2)
38807 .sr(1)
38808 .m(m)
38809 .n(n)
38810 .k(k)
38811 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080038812 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038813 }
38814 }
38815 }
38816 }
38817
38818 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_gt_8) {
38819 for (size_t k = 9; k < 16; k++) {
38820 GemmMicrokernelTester()
38821 .mr(1)
38822 .nr(4)
38823 .kr(2)
38824 .sr(1)
38825 .m(1)
38826 .n(4)
38827 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080038828 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038829 }
38830 }
38831
38832 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_gt_8_strided_a) {
38833 for (size_t k = 9; k < 16; k++) {
38834 GemmMicrokernelTester()
38835 .mr(1)
38836 .nr(4)
38837 .kr(2)
38838 .sr(1)
38839 .m(1)
38840 .n(4)
38841 .k(k)
38842 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080038843 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038844 }
38845 }
38846
38847 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
38848 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080038849 for (uint32_t n = 1; n <= 4; n++) {
38850 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038851 GemmMicrokernelTester()
38852 .mr(1)
38853 .nr(4)
38854 .kr(2)
38855 .sr(1)
38856 .m(m)
38857 .n(n)
38858 .k(k)
38859 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080038860 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038861 }
38862 }
38863 }
38864 }
38865
38866 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_div_8) {
38867 for (size_t k = 16; k <= 80; k += 8) {
38868 GemmMicrokernelTester()
38869 .mr(1)
38870 .nr(4)
38871 .kr(2)
38872 .sr(1)
38873 .m(1)
38874 .n(4)
38875 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080038876 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038877 }
38878 }
38879
38880 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_div_8_strided_a) {
38881 for (size_t k = 16; k <= 80; k += 8) {
38882 GemmMicrokernelTester()
38883 .mr(1)
38884 .nr(4)
38885 .kr(2)
38886 .sr(1)
38887 .m(1)
38888 .n(4)
38889 .k(k)
38890 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080038891 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038892 }
38893 }
38894
38895 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
38896 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080038897 for (uint32_t n = 1; n <= 4; n++) {
38898 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038899 GemmMicrokernelTester()
38900 .mr(1)
38901 .nr(4)
38902 .kr(2)
38903 .sr(1)
38904 .m(m)
38905 .n(n)
38906 .k(k)
38907 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080038908 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038909 }
38910 }
38911 }
38912 }
38913
38914 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4) {
38915 for (uint32_t n = 5; n < 8; n++) {
38916 for (size_t k = 1; k <= 40; k += 9) {
38917 GemmMicrokernelTester()
38918 .mr(1)
38919 .nr(4)
38920 .kr(2)
38921 .sr(1)
38922 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080038923 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038924 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080038925 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038926 }
38927 }
38928 }
38929
38930 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
38931 for (uint32_t n = 5; n < 8; n++) {
38932 for (size_t k = 1; k <= 40; k += 9) {
38933 GemmMicrokernelTester()
38934 .mr(1)
38935 .nr(4)
38936 .kr(2)
38937 .sr(1)
38938 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080038939 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038940 .k(k)
38941 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080038942 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038943 }
38944 }
38945 }
38946
38947 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_a) {
38948 for (uint32_t n = 5; n < 8; n++) {
38949 for (size_t k = 1; k <= 40; k += 9) {
38950 GemmMicrokernelTester()
38951 .mr(1)
38952 .nr(4)
38953 .kr(2)
38954 .sr(1)
38955 .m(1)
38956 .n(n)
38957 .k(k)
38958 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080038959 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038960 }
38961 }
38962 }
38963
38964 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
38965 for (uint32_t n = 5; n < 8; n++) {
38966 for (size_t k = 1; k <= 40; k += 9) {
38967 for (uint32_t m = 1; m <= 1; m++) {
38968 GemmMicrokernelTester()
38969 .mr(1)
38970 .nr(4)
38971 .kr(2)
38972 .sr(1)
38973 .m(m)
38974 .n(n)
38975 .k(k)
38976 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080038977 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038978 }
38979 }
38980 }
38981 }
38982
38983 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, n_div_4) {
38984 for (uint32_t n = 8; n <= 12; n += 4) {
38985 for (size_t k = 1; k <= 40; k += 9) {
38986 GemmMicrokernelTester()
38987 .mr(1)
38988 .nr(4)
38989 .kr(2)
38990 .sr(1)
38991 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080038992 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038993 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080038994 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070038995 }
38996 }
38997 }
38998
38999 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
39000 for (uint32_t n = 8; n <= 12; n += 4) {
39001 for (size_t k = 1; k <= 40; k += 9) {
39002 GemmMicrokernelTester()
39003 .mr(1)
39004 .nr(4)
39005 .kr(2)
39006 .sr(1)
39007 .m(1)
39008 .n(n)
39009 .k(k)
39010 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080039011 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039012 }
39013 }
39014 }
39015
39016 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_strided_a) {
39017 for (uint32_t n = 8; n <= 12; n += 4) {
39018 for (size_t k = 1; k <= 40; k += 9) {
39019 GemmMicrokernelTester()
39020 .mr(1)
39021 .nr(4)
39022 .kr(2)
39023 .sr(1)
39024 .m(1)
39025 .n(n)
39026 .k(k)
39027 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080039028 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039029 }
39030 }
39031 }
39032
39033 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
39034 for (uint32_t n = 8; n <= 12; n += 4) {
39035 for (size_t k = 1; k <= 40; k += 9) {
39036 for (uint32_t m = 1; m <= 1; m++) {
39037 GemmMicrokernelTester()
39038 .mr(1)
39039 .nr(4)
39040 .kr(2)
39041 .sr(1)
39042 .m(m)
39043 .n(n)
39044 .k(k)
39045 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080039046 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039047 }
39048 }
39049 }
39050 }
39051
39052 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
39053 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080039054 for (uint32_t n = 1; n <= 4; n++) {
39055 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039056 GemmMicrokernelTester()
39057 .mr(1)
39058 .nr(4)
39059 .kr(2)
39060 .sr(1)
39061 .m(m)
39062 .n(n)
39063 .k(k)
39064 .cm_stride(7)
39065 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080039066 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039067 }
39068 }
39069 }
39070 }
39071
39072 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, qmin) {
39073 GemmMicrokernelTester()
39074 .mr(1)
39075 .nr(4)
39076 .kr(2)
39077 .sr(1)
39078 .m(1)
39079 .n(4)
39080 .k(8)
39081 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080039082 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039083 }
39084
39085 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, qmax) {
39086 GemmMicrokernelTester()
39087 .mr(1)
39088 .nr(4)
39089 .kr(2)
39090 .sr(1)
39091 .m(1)
39092 .n(4)
39093 .k(8)
39094 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080039095 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039096 }
39097
39098 TEST(QS8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, strided_cm) {
39099 GemmMicrokernelTester()
39100 .mr(1)
39101 .nr(4)
39102 .kr(2)
39103 .sr(1)
39104 .m(1)
39105 .n(4)
39106 .k(8)
39107 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080039108 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039109 }
Marat Dukhan4c617792021-12-21 15:47:58 -080039110#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039111
39112
Marat Dukhan4c617792021-12-21 15:47:58 -080039113#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039114 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8) {
39115 GemmMicrokernelTester()
39116 .mr(2)
39117 .nr(4)
39118 .kr(2)
39119 .sr(1)
39120 .m(2)
39121 .n(4)
39122 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080039123 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039124 }
39125
39126 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, strided_cn) {
39127 GemmMicrokernelTester()
39128 .mr(2)
39129 .nr(4)
39130 .kr(2)
39131 .sr(1)
39132 .m(2)
39133 .n(4)
39134 .k(8)
39135 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080039136 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039137 }
39138
39139 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_strided_a) {
39140 GemmMicrokernelTester()
39141 .mr(2)
39142 .nr(4)
39143 .kr(2)
39144 .sr(1)
39145 .m(2)
39146 .n(4)
39147 .k(8)
39148 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080039149 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039150 }
39151
39152 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080039153 for (uint32_t n = 1; n <= 4; n++) {
39154 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039155 GemmMicrokernelTester()
39156 .mr(2)
39157 .nr(4)
39158 .kr(2)
39159 .sr(1)
39160 .m(m)
39161 .n(n)
39162 .k(8)
39163 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080039164 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039165 }
39166 }
39167 }
39168
39169 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
39170 for (uint32_t m = 1; m <= 2; m++) {
39171 GemmMicrokernelTester()
39172 .mr(2)
39173 .nr(4)
39174 .kr(2)
39175 .sr(1)
39176 .m(m)
39177 .n(4)
39178 .k(8)
39179 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080039180 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039181 }
39182 }
39183
39184 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
39185 for (uint32_t n = 1; n <= 4; n++) {
39186 GemmMicrokernelTester()
39187 .mr(2)
39188 .nr(4)
39189 .kr(2)
39190 .sr(1)
39191 .m(2)
39192 .n(n)
39193 .k(8)
39194 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080039195 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039196 }
39197 }
39198
39199 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_lt_8) {
39200 for (size_t k = 1; k < 8; k++) {
39201 GemmMicrokernelTester()
39202 .mr(2)
39203 .nr(4)
39204 .kr(2)
39205 .sr(1)
39206 .m(2)
39207 .n(4)
39208 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080039209 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039210 }
39211 }
39212
39213 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_lt_8_strided_a) {
39214 for (size_t k = 1; k < 8; k++) {
39215 GemmMicrokernelTester()
39216 .mr(2)
39217 .nr(4)
39218 .kr(2)
39219 .sr(1)
39220 .m(2)
39221 .n(4)
39222 .k(k)
39223 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080039224 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039225 }
39226 }
39227
39228 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
39229 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080039230 for (uint32_t n = 1; n <= 4; n++) {
39231 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039232 GemmMicrokernelTester()
39233 .mr(2)
39234 .nr(4)
39235 .kr(2)
39236 .sr(1)
39237 .m(m)
39238 .n(n)
39239 .k(k)
39240 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080039241 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039242 }
39243 }
39244 }
39245 }
39246
39247 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_gt_8) {
39248 for (size_t k = 9; k < 16; k++) {
39249 GemmMicrokernelTester()
39250 .mr(2)
39251 .nr(4)
39252 .kr(2)
39253 .sr(1)
39254 .m(2)
39255 .n(4)
39256 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080039257 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039258 }
39259 }
39260
39261 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_gt_8_strided_a) {
39262 for (size_t k = 9; k < 16; k++) {
39263 GemmMicrokernelTester()
39264 .mr(2)
39265 .nr(4)
39266 .kr(2)
39267 .sr(1)
39268 .m(2)
39269 .n(4)
39270 .k(k)
39271 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080039272 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039273 }
39274 }
39275
39276 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
39277 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080039278 for (uint32_t n = 1; n <= 4; n++) {
39279 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039280 GemmMicrokernelTester()
39281 .mr(2)
39282 .nr(4)
39283 .kr(2)
39284 .sr(1)
39285 .m(m)
39286 .n(n)
39287 .k(k)
39288 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080039289 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039290 }
39291 }
39292 }
39293 }
39294
39295 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_div_8) {
39296 for (size_t k = 16; k <= 80; k += 8) {
39297 GemmMicrokernelTester()
39298 .mr(2)
39299 .nr(4)
39300 .kr(2)
39301 .sr(1)
39302 .m(2)
39303 .n(4)
39304 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080039305 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039306 }
39307 }
39308
39309 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_div_8_strided_a) {
39310 for (size_t k = 16; k <= 80; k += 8) {
39311 GemmMicrokernelTester()
39312 .mr(2)
39313 .nr(4)
39314 .kr(2)
39315 .sr(1)
39316 .m(2)
39317 .n(4)
39318 .k(k)
39319 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080039320 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039321 }
39322 }
39323
39324 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
39325 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080039326 for (uint32_t n = 1; n <= 4; n++) {
39327 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039328 GemmMicrokernelTester()
39329 .mr(2)
39330 .nr(4)
39331 .kr(2)
39332 .sr(1)
39333 .m(m)
39334 .n(n)
39335 .k(k)
39336 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080039337 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039338 }
39339 }
39340 }
39341 }
39342
39343 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4) {
39344 for (uint32_t n = 5; n < 8; n++) {
39345 for (size_t k = 1; k <= 40; k += 9) {
39346 GemmMicrokernelTester()
39347 .mr(2)
39348 .nr(4)
39349 .kr(2)
39350 .sr(1)
39351 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080039352 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039353 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080039354 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039355 }
39356 }
39357 }
39358
39359 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
39360 for (uint32_t n = 5; n < 8; n++) {
39361 for (size_t k = 1; k <= 40; k += 9) {
39362 GemmMicrokernelTester()
39363 .mr(2)
39364 .nr(4)
39365 .kr(2)
39366 .sr(1)
39367 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080039368 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039369 .k(k)
39370 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080039371 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039372 }
39373 }
39374 }
39375
39376 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_a) {
39377 for (uint32_t n = 5; n < 8; n++) {
39378 for (size_t k = 1; k <= 40; k += 9) {
39379 GemmMicrokernelTester()
39380 .mr(2)
39381 .nr(4)
39382 .kr(2)
39383 .sr(1)
39384 .m(2)
39385 .n(n)
39386 .k(k)
39387 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080039388 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039389 }
39390 }
39391 }
39392
39393 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
39394 for (uint32_t n = 5; n < 8; n++) {
39395 for (size_t k = 1; k <= 40; k += 9) {
39396 for (uint32_t m = 1; m <= 2; m++) {
39397 GemmMicrokernelTester()
39398 .mr(2)
39399 .nr(4)
39400 .kr(2)
39401 .sr(1)
39402 .m(m)
39403 .n(n)
39404 .k(k)
39405 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080039406 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039407 }
39408 }
39409 }
39410 }
39411
39412 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, n_div_4) {
39413 for (uint32_t n = 8; n <= 12; n += 4) {
39414 for (size_t k = 1; k <= 40; k += 9) {
39415 GemmMicrokernelTester()
39416 .mr(2)
39417 .nr(4)
39418 .kr(2)
39419 .sr(1)
39420 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080039421 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039422 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080039423 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039424 }
39425 }
39426 }
39427
39428 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
39429 for (uint32_t n = 8; n <= 12; n += 4) {
39430 for (size_t k = 1; k <= 40; k += 9) {
39431 GemmMicrokernelTester()
39432 .mr(2)
39433 .nr(4)
39434 .kr(2)
39435 .sr(1)
39436 .m(2)
39437 .n(n)
39438 .k(k)
39439 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080039440 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039441 }
39442 }
39443 }
39444
39445 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_strided_a) {
39446 for (uint32_t n = 8; n <= 12; n += 4) {
39447 for (size_t k = 1; k <= 40; k += 9) {
39448 GemmMicrokernelTester()
39449 .mr(2)
39450 .nr(4)
39451 .kr(2)
39452 .sr(1)
39453 .m(2)
39454 .n(n)
39455 .k(k)
39456 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080039457 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039458 }
39459 }
39460 }
39461
39462 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
39463 for (uint32_t n = 8; n <= 12; n += 4) {
39464 for (size_t k = 1; k <= 40; k += 9) {
39465 for (uint32_t m = 1; m <= 2; m++) {
39466 GemmMicrokernelTester()
39467 .mr(2)
39468 .nr(4)
39469 .kr(2)
39470 .sr(1)
39471 .m(m)
39472 .n(n)
39473 .k(k)
39474 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080039475 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039476 }
39477 }
39478 }
39479 }
39480
39481 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
39482 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080039483 for (uint32_t n = 1; n <= 4; n++) {
39484 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039485 GemmMicrokernelTester()
39486 .mr(2)
39487 .nr(4)
39488 .kr(2)
39489 .sr(1)
39490 .m(m)
39491 .n(n)
39492 .k(k)
39493 .cm_stride(7)
39494 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080039495 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039496 }
39497 }
39498 }
39499 }
39500
39501 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, qmin) {
39502 GemmMicrokernelTester()
39503 .mr(2)
39504 .nr(4)
39505 .kr(2)
39506 .sr(1)
39507 .m(2)
39508 .n(4)
39509 .k(8)
39510 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080039511 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039512 }
39513
39514 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, qmax) {
39515 GemmMicrokernelTester()
39516 .mr(2)
39517 .nr(4)
39518 .kr(2)
39519 .sr(1)
39520 .m(2)
39521 .n(4)
39522 .k(8)
39523 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080039524 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039525 }
39526
39527 TEST(QS8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, strided_cm) {
39528 GemmMicrokernelTester()
39529 .mr(2)
39530 .nr(4)
39531 .kr(2)
39532 .sr(1)
39533 .m(2)
39534 .n(4)
39535 .k(8)
39536 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080039537 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039538 }
Marat Dukhan4c617792021-12-21 15:47:58 -080039539#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039540
39541
Marat Dukhan4c617792021-12-21 15:47:58 -080039542#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039543 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8) {
39544 GemmMicrokernelTester()
39545 .mr(3)
39546 .nr(4)
39547 .kr(2)
39548 .sr(1)
39549 .m(3)
39550 .n(4)
39551 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080039552 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039553 }
39554
39555 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, strided_cn) {
39556 GemmMicrokernelTester()
39557 .mr(3)
39558 .nr(4)
39559 .kr(2)
39560 .sr(1)
39561 .m(3)
39562 .n(4)
39563 .k(8)
39564 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080039565 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039566 }
39567
39568 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_strided_a) {
39569 GemmMicrokernelTester()
39570 .mr(3)
39571 .nr(4)
39572 .kr(2)
39573 .sr(1)
39574 .m(3)
39575 .n(4)
39576 .k(8)
39577 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080039578 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039579 }
39580
39581 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080039582 for (uint32_t n = 1; n <= 4; n++) {
39583 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039584 GemmMicrokernelTester()
39585 .mr(3)
39586 .nr(4)
39587 .kr(2)
39588 .sr(1)
39589 .m(m)
39590 .n(n)
39591 .k(8)
39592 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080039593 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039594 }
39595 }
39596 }
39597
39598 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
39599 for (uint32_t m = 1; m <= 3; m++) {
39600 GemmMicrokernelTester()
39601 .mr(3)
39602 .nr(4)
39603 .kr(2)
39604 .sr(1)
39605 .m(m)
39606 .n(4)
39607 .k(8)
39608 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080039609 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039610 }
39611 }
39612
39613 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
39614 for (uint32_t n = 1; n <= 4; n++) {
39615 GemmMicrokernelTester()
39616 .mr(3)
39617 .nr(4)
39618 .kr(2)
39619 .sr(1)
39620 .m(3)
39621 .n(n)
39622 .k(8)
39623 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080039624 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039625 }
39626 }
39627
39628 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_lt_8) {
39629 for (size_t k = 1; k < 8; k++) {
39630 GemmMicrokernelTester()
39631 .mr(3)
39632 .nr(4)
39633 .kr(2)
39634 .sr(1)
39635 .m(3)
39636 .n(4)
39637 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080039638 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039639 }
39640 }
39641
39642 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_lt_8_strided_a) {
39643 for (size_t k = 1; k < 8; k++) {
39644 GemmMicrokernelTester()
39645 .mr(3)
39646 .nr(4)
39647 .kr(2)
39648 .sr(1)
39649 .m(3)
39650 .n(4)
39651 .k(k)
39652 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080039653 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039654 }
39655 }
39656
39657 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
39658 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080039659 for (uint32_t n = 1; n <= 4; n++) {
39660 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039661 GemmMicrokernelTester()
39662 .mr(3)
39663 .nr(4)
39664 .kr(2)
39665 .sr(1)
39666 .m(m)
39667 .n(n)
39668 .k(k)
39669 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080039670 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039671 }
39672 }
39673 }
39674 }
39675
39676 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_gt_8) {
39677 for (size_t k = 9; k < 16; k++) {
39678 GemmMicrokernelTester()
39679 .mr(3)
39680 .nr(4)
39681 .kr(2)
39682 .sr(1)
39683 .m(3)
39684 .n(4)
39685 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080039686 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039687 }
39688 }
39689
39690 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_gt_8_strided_a) {
39691 for (size_t k = 9; k < 16; k++) {
39692 GemmMicrokernelTester()
39693 .mr(3)
39694 .nr(4)
39695 .kr(2)
39696 .sr(1)
39697 .m(3)
39698 .n(4)
39699 .k(k)
39700 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080039701 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039702 }
39703 }
39704
39705 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
39706 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080039707 for (uint32_t n = 1; n <= 4; n++) {
39708 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039709 GemmMicrokernelTester()
39710 .mr(3)
39711 .nr(4)
39712 .kr(2)
39713 .sr(1)
39714 .m(m)
39715 .n(n)
39716 .k(k)
39717 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080039718 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039719 }
39720 }
39721 }
39722 }
39723
39724 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_div_8) {
39725 for (size_t k = 16; k <= 80; k += 8) {
39726 GemmMicrokernelTester()
39727 .mr(3)
39728 .nr(4)
39729 .kr(2)
39730 .sr(1)
39731 .m(3)
39732 .n(4)
39733 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080039734 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039735 }
39736 }
39737
39738 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_div_8_strided_a) {
39739 for (size_t k = 16; k <= 80; k += 8) {
39740 GemmMicrokernelTester()
39741 .mr(3)
39742 .nr(4)
39743 .kr(2)
39744 .sr(1)
39745 .m(3)
39746 .n(4)
39747 .k(k)
39748 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080039749 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039750 }
39751 }
39752
39753 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
39754 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080039755 for (uint32_t n = 1; n <= 4; n++) {
39756 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039757 GemmMicrokernelTester()
39758 .mr(3)
39759 .nr(4)
39760 .kr(2)
39761 .sr(1)
39762 .m(m)
39763 .n(n)
39764 .k(k)
39765 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080039766 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039767 }
39768 }
39769 }
39770 }
39771
39772 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4) {
39773 for (uint32_t n = 5; n < 8; n++) {
39774 for (size_t k = 1; k <= 40; k += 9) {
39775 GemmMicrokernelTester()
39776 .mr(3)
39777 .nr(4)
39778 .kr(2)
39779 .sr(1)
39780 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080039781 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039782 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080039783 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039784 }
39785 }
39786 }
39787
39788 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
39789 for (uint32_t n = 5; n < 8; n++) {
39790 for (size_t k = 1; k <= 40; k += 9) {
39791 GemmMicrokernelTester()
39792 .mr(3)
39793 .nr(4)
39794 .kr(2)
39795 .sr(1)
39796 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080039797 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039798 .k(k)
39799 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080039800 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039801 }
39802 }
39803 }
39804
39805 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_a) {
39806 for (uint32_t n = 5; n < 8; n++) {
39807 for (size_t k = 1; k <= 40; k += 9) {
39808 GemmMicrokernelTester()
39809 .mr(3)
39810 .nr(4)
39811 .kr(2)
39812 .sr(1)
39813 .m(3)
39814 .n(n)
39815 .k(k)
39816 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080039817 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039818 }
39819 }
39820 }
39821
39822 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
39823 for (uint32_t n = 5; n < 8; n++) {
39824 for (size_t k = 1; k <= 40; k += 9) {
39825 for (uint32_t m = 1; m <= 3; m++) {
39826 GemmMicrokernelTester()
39827 .mr(3)
39828 .nr(4)
39829 .kr(2)
39830 .sr(1)
39831 .m(m)
39832 .n(n)
39833 .k(k)
39834 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080039835 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039836 }
39837 }
39838 }
39839 }
39840
39841 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, n_div_4) {
39842 for (uint32_t n = 8; n <= 12; n += 4) {
39843 for (size_t k = 1; k <= 40; k += 9) {
39844 GemmMicrokernelTester()
39845 .mr(3)
39846 .nr(4)
39847 .kr(2)
39848 .sr(1)
39849 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080039850 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039851 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080039852 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039853 }
39854 }
39855 }
39856
39857 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
39858 for (uint32_t n = 8; n <= 12; n += 4) {
39859 for (size_t k = 1; k <= 40; k += 9) {
39860 GemmMicrokernelTester()
39861 .mr(3)
39862 .nr(4)
39863 .kr(2)
39864 .sr(1)
39865 .m(3)
39866 .n(n)
39867 .k(k)
39868 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080039869 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039870 }
39871 }
39872 }
39873
39874 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_strided_a) {
39875 for (uint32_t n = 8; n <= 12; n += 4) {
39876 for (size_t k = 1; k <= 40; k += 9) {
39877 GemmMicrokernelTester()
39878 .mr(3)
39879 .nr(4)
39880 .kr(2)
39881 .sr(1)
39882 .m(3)
39883 .n(n)
39884 .k(k)
39885 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080039886 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039887 }
39888 }
39889 }
39890
39891 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
39892 for (uint32_t n = 8; n <= 12; n += 4) {
39893 for (size_t k = 1; k <= 40; k += 9) {
39894 for (uint32_t m = 1; m <= 3; m++) {
39895 GemmMicrokernelTester()
39896 .mr(3)
39897 .nr(4)
39898 .kr(2)
39899 .sr(1)
39900 .m(m)
39901 .n(n)
39902 .k(k)
39903 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080039904 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039905 }
39906 }
39907 }
39908 }
39909
39910 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
39911 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080039912 for (uint32_t n = 1; n <= 4; n++) {
39913 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039914 GemmMicrokernelTester()
39915 .mr(3)
39916 .nr(4)
39917 .kr(2)
39918 .sr(1)
39919 .m(m)
39920 .n(n)
39921 .k(k)
39922 .cm_stride(7)
39923 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080039924 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039925 }
39926 }
39927 }
39928 }
39929
39930 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, qmin) {
39931 GemmMicrokernelTester()
39932 .mr(3)
39933 .nr(4)
39934 .kr(2)
39935 .sr(1)
39936 .m(3)
39937 .n(4)
39938 .k(8)
39939 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080039940 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039941 }
39942
39943 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, qmax) {
39944 GemmMicrokernelTester()
39945 .mr(3)
39946 .nr(4)
39947 .kr(2)
39948 .sr(1)
39949 .m(3)
39950 .n(4)
39951 .k(8)
39952 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080039953 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039954 }
39955
39956 TEST(QS8_GEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, strided_cm) {
39957 GemmMicrokernelTester()
39958 .mr(3)
39959 .nr(4)
39960 .kr(2)
39961 .sr(1)
39962 .m(3)
39963 .n(4)
39964 .k(8)
39965 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080039966 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039967 }
Marat Dukhan4c617792021-12-21 15:47:58 -080039968#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039969
39970
Marat Dukhan4c617792021-12-21 15:47:58 -080039971#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039972 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2, k_eq_8) {
39973 GemmMicrokernelTester()
39974 .extended_weights(true)
39975 .mr(1)
39976 .nr(4)
39977 .kr(2)
39978 .sr(1)
39979 .m(1)
39980 .n(4)
39981 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080039982 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039983 }
39984
39985 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2, strided_cn) {
39986 GemmMicrokernelTester()
39987 .extended_weights(true)
39988 .mr(1)
39989 .nr(4)
39990 .kr(2)
39991 .sr(1)
39992 .m(1)
39993 .n(4)
39994 .k(8)
39995 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080039996 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070039997 }
39998
39999 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2, k_eq_8_strided_a) {
40000 GemmMicrokernelTester()
40001 .extended_weights(true)
40002 .mr(1)
40003 .nr(4)
40004 .kr(2)
40005 .sr(1)
40006 .m(1)
40007 .n(4)
40008 .k(8)
40009 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080040010 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070040011 }
40012
40013 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2, k_eq_8_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080040014 for (uint32_t n = 1; n <= 4; n++) {
40015 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070040016 GemmMicrokernelTester()
40017 .extended_weights(true)
40018 .mr(1)
40019 .nr(4)
40020 .kr(2)
40021 .sr(1)
40022 .m(m)
40023 .n(n)
40024 .k(8)
40025 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080040026 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070040027 }
40028 }
40029 }
40030
40031 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2, k_eq_8_subtile_m) {
40032 for (uint32_t m = 1; m <= 1; m++) {
40033 GemmMicrokernelTester()
40034 .extended_weights(true)
40035 .mr(1)
40036 .nr(4)
40037 .kr(2)
40038 .sr(1)
40039 .m(m)
40040 .n(4)
40041 .k(8)
40042 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080040043 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070040044 }
40045 }
40046
40047 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2, k_eq_8_subtile_n) {
40048 for (uint32_t n = 1; n <= 4; n++) {
40049 GemmMicrokernelTester()
40050 .extended_weights(true)
40051 .mr(1)
40052 .nr(4)
40053 .kr(2)
40054 .sr(1)
40055 .m(1)
40056 .n(n)
40057 .k(8)
40058 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080040059 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070040060 }
40061 }
40062
40063 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2, k_lt_8) {
40064 for (size_t k = 1; k < 8; k++) {
40065 GemmMicrokernelTester()
40066 .extended_weights(true)
40067 .mr(1)
40068 .nr(4)
40069 .kr(2)
40070 .sr(1)
40071 .m(1)
40072 .n(4)
40073 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080040074 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070040075 }
40076 }
40077
40078 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2, k_lt_8_strided_a) {
40079 for (size_t k = 1; k < 8; k++) {
40080 GemmMicrokernelTester()
40081 .extended_weights(true)
40082 .mr(1)
40083 .nr(4)
40084 .kr(2)
40085 .sr(1)
40086 .m(1)
40087 .n(4)
40088 .k(k)
40089 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080040090 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070040091 }
40092 }
40093
40094 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2, k_lt_8_subtile) {
40095 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080040096 for (uint32_t n = 1; n <= 4; n++) {
40097 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070040098 GemmMicrokernelTester()
40099 .extended_weights(true)
40100 .mr(1)
40101 .nr(4)
40102 .kr(2)
40103 .sr(1)
40104 .m(m)
40105 .n(n)
40106 .k(k)
40107 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080040108 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070040109 }
40110 }
40111 }
40112 }
40113
40114 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2, k_gt_8) {
40115 for (size_t k = 9; k < 16; k++) {
40116 GemmMicrokernelTester()
40117 .extended_weights(true)
40118 .mr(1)
40119 .nr(4)
40120 .kr(2)
40121 .sr(1)
40122 .m(1)
40123 .n(4)
40124 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080040125 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070040126 }
40127 }
40128
40129 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2, k_gt_8_strided_a) {
40130 for (size_t k = 9; k < 16; k++) {
40131 GemmMicrokernelTester()
40132 .extended_weights(true)
40133 .mr(1)
40134 .nr(4)
40135 .kr(2)
40136 .sr(1)
40137 .m(1)
40138 .n(4)
40139 .k(k)
40140 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080040141 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070040142 }
40143 }
40144
40145 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2, k_gt_8_subtile) {
40146 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080040147 for (uint32_t n = 1; n <= 4; n++) {
40148 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070040149 GemmMicrokernelTester()
40150 .extended_weights(true)
40151 .mr(1)
40152 .nr(4)
40153 .kr(2)
40154 .sr(1)
40155 .m(m)
40156 .n(n)
40157 .k(k)
40158 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080040159 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070040160 }
40161 }
40162 }
40163 }
40164
40165 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2, k_div_8) {
40166 for (size_t k = 16; k <= 80; k += 8) {
40167 GemmMicrokernelTester()
40168 .extended_weights(true)
40169 .mr(1)
40170 .nr(4)
40171 .kr(2)
40172 .sr(1)
40173 .m(1)
40174 .n(4)
40175 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080040176 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070040177 }
40178 }
40179
40180 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2, k_div_8_strided_a) {
40181 for (size_t k = 16; k <= 80; k += 8) {
40182 GemmMicrokernelTester()
40183 .extended_weights(true)
40184 .mr(1)
40185 .nr(4)
40186 .kr(2)
40187 .sr(1)
40188 .m(1)
40189 .n(4)
40190 .k(k)
40191 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080040192 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070040193 }
40194 }
40195
40196 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2, k_div_8_subtile) {
40197 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080040198 for (uint32_t n = 1; n <= 4; n++) {
40199 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070040200 GemmMicrokernelTester()
40201 .extended_weights(true)
40202 .mr(1)
40203 .nr(4)
40204 .kr(2)
40205 .sr(1)
40206 .m(m)
40207 .n(n)
40208 .k(k)
40209 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080040210 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070040211 }
40212 }
40213 }
40214 }
40215
40216 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2, n_gt_4) {
40217 for (uint32_t n = 5; n < 8; n++) {
40218 for (size_t k = 1; k <= 40; k += 9) {
40219 GemmMicrokernelTester()
40220 .extended_weights(true)
40221 .mr(1)
40222 .nr(4)
40223 .kr(2)
40224 .sr(1)
40225 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080040226 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070040227 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080040228 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070040229 }
40230 }
40231 }
40232
40233 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2, n_gt_4_strided_cn) {
40234 for (uint32_t n = 5; n < 8; n++) {
40235 for (size_t k = 1; k <= 40; k += 9) {
40236 GemmMicrokernelTester()
40237 .extended_weights(true)
40238 .mr(1)
40239 .nr(4)
40240 .kr(2)
40241 .sr(1)
40242 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080040243 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070040244 .k(k)
40245 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080040246 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070040247 }
40248 }
40249 }
40250
40251 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2, n_gt_4_strided_a) {
40252 for (uint32_t n = 5; n < 8; n++) {
40253 for (size_t k = 1; k <= 40; k += 9) {
40254 GemmMicrokernelTester()
40255 .extended_weights(true)
40256 .mr(1)
40257 .nr(4)
40258 .kr(2)
40259 .sr(1)
40260 .m(1)
40261 .n(n)
40262 .k(k)
40263 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080040264 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070040265 }
40266 }
40267 }
40268
40269 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2, n_gt_4_subtile) {
40270 for (uint32_t n = 5; n < 8; n++) {
40271 for (size_t k = 1; k <= 40; k += 9) {
40272 for (uint32_t m = 1; m <= 1; m++) {
40273 GemmMicrokernelTester()
40274 .extended_weights(true)
40275 .mr(1)
40276 .nr(4)
40277 .kr(2)
40278 .sr(1)
40279 .m(m)
40280 .n(n)
40281 .k(k)
40282 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080040283 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070040284 }
40285 }
40286 }
40287 }
40288
40289 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2, n_div_4) {
40290 for (uint32_t n = 8; n <= 12; n += 4) {
40291 for (size_t k = 1; k <= 40; k += 9) {
40292 GemmMicrokernelTester()
40293 .extended_weights(true)
40294 .mr(1)
40295 .nr(4)
40296 .kr(2)
40297 .sr(1)
40298 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080040299 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070040300 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080040301 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070040302 }
40303 }
40304 }
40305
40306 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2, n_div_4_strided_cn) {
40307 for (uint32_t n = 8; n <= 12; n += 4) {
40308 for (size_t k = 1; k <= 40; k += 9) {
40309 GemmMicrokernelTester()
40310 .extended_weights(true)
40311 .mr(1)
40312 .nr(4)
40313 .kr(2)
40314 .sr(1)
40315 .m(1)
40316 .n(n)
40317 .k(k)
40318 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080040319 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070040320 }
40321 }
40322 }
40323
40324 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2, n_div_4_strided_a) {
40325 for (uint32_t n = 8; n <= 12; n += 4) {
40326 for (size_t k = 1; k <= 40; k += 9) {
40327 GemmMicrokernelTester()
40328 .extended_weights(true)
40329 .mr(1)
40330 .nr(4)
40331 .kr(2)
40332 .sr(1)
40333 .m(1)
40334 .n(n)
40335 .k(k)
40336 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080040337 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070040338 }
40339 }
40340 }
40341
40342 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2, n_div_4_subtile) {
40343 for (uint32_t n = 8; n <= 12; n += 4) {
40344 for (size_t k = 1; k <= 40; k += 9) {
40345 for (uint32_t m = 1; m <= 1; m++) {
40346 GemmMicrokernelTester()
40347 .extended_weights(true)
40348 .mr(1)
40349 .nr(4)
40350 .kr(2)
40351 .sr(1)
40352 .m(m)
40353 .n(n)
40354 .k(k)
40355 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080040356 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070040357 }
40358 }
40359 }
40360 }
40361
40362 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2, strided_cm_subtile) {
40363 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080040364 for (uint32_t n = 1; n <= 4; n++) {
40365 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070040366 GemmMicrokernelTester()
40367 .extended_weights(true)
40368 .mr(1)
40369 .nr(4)
40370 .kr(2)
40371 .sr(1)
40372 .m(m)
40373 .n(n)
40374 .k(k)
40375 .cm_stride(7)
40376 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080040377 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070040378 }
40379 }
40380 }
40381 }
40382
40383 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2, strided_cm) {
40384 GemmMicrokernelTester()
40385 .extended_weights(true)
40386 .mr(1)
40387 .nr(4)
40388 .kr(2)
40389 .sr(1)
40390 .m(1)
40391 .n(4)
40392 .k(8)
40393 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080040394 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070040395 }
Marat Dukhan4c617792021-12-21 15:47:58 -080040396#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan8dc106e2021-08-31 15:23:02 -070040397
40398
Marat Dukhan4c617792021-12-21 15:47:58 -080040399#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan0f1ed942021-12-08 23:25:50 -080040400 TEST(QS8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8) {
40401 GemmMicrokernelTester()
40402 .mr(1)
40403 .nr(4)
40404 .kr(2)
40405 .sr(4)
40406 .m(1)
40407 .n(4)
40408 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080040409 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080040410 }
40411
40412 TEST(QS8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cn) {
40413 GemmMicrokernelTester()
40414 .mr(1)
40415 .nr(4)
40416 .kr(2)
40417 .sr(4)
40418 .m(1)
40419 .n(4)
40420 .k(8)
40421 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080040422 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080040423 }
40424
40425 TEST(QS8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_strided_a) {
40426 GemmMicrokernelTester()
40427 .mr(1)
40428 .nr(4)
40429 .kr(2)
40430 .sr(4)
40431 .m(1)
40432 .n(4)
40433 .k(8)
40434 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080040435 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080040436 }
40437
40438 TEST(QS8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080040439 for (uint32_t n = 1; n <= 4; n++) {
40440 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan0f1ed942021-12-08 23:25:50 -080040441 GemmMicrokernelTester()
40442 .mr(1)
40443 .nr(4)
40444 .kr(2)
40445 .sr(4)
40446 .m(m)
40447 .n(n)
40448 .k(8)
40449 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080040450 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080040451 }
40452 }
40453 }
40454
40455 TEST(QS8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
40456 for (uint32_t m = 1; m <= 1; m++) {
40457 GemmMicrokernelTester()
40458 .mr(1)
40459 .nr(4)
40460 .kr(2)
40461 .sr(4)
40462 .m(m)
40463 .n(4)
40464 .k(8)
40465 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080040466 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080040467 }
40468 }
40469
40470 TEST(QS8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
40471 for (uint32_t n = 1; n <= 4; n++) {
40472 GemmMicrokernelTester()
40473 .mr(1)
40474 .nr(4)
40475 .kr(2)
40476 .sr(4)
40477 .m(1)
40478 .n(n)
40479 .k(8)
40480 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080040481 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080040482 }
40483 }
40484
40485 TEST(QS8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8) {
40486 for (size_t k = 1; k < 8; k++) {
40487 GemmMicrokernelTester()
40488 .mr(1)
40489 .nr(4)
40490 .kr(2)
40491 .sr(4)
40492 .m(1)
40493 .n(4)
40494 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080040495 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080040496 }
40497 }
40498
40499 TEST(QS8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8_strided_a) {
40500 for (size_t k = 1; k < 8; k++) {
40501 GemmMicrokernelTester()
40502 .mr(1)
40503 .nr(4)
40504 .kr(2)
40505 .sr(4)
40506 .m(1)
40507 .n(4)
40508 .k(k)
40509 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080040510 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080040511 }
40512 }
40513
40514 TEST(QS8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
40515 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080040516 for (uint32_t n = 1; n <= 4; n++) {
40517 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan0f1ed942021-12-08 23:25:50 -080040518 GemmMicrokernelTester()
40519 .mr(1)
40520 .nr(4)
40521 .kr(2)
40522 .sr(4)
40523 .m(m)
40524 .n(n)
40525 .k(k)
40526 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080040527 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080040528 }
40529 }
40530 }
40531 }
40532
40533 TEST(QS8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8) {
40534 for (size_t k = 9; k < 16; k++) {
40535 GemmMicrokernelTester()
40536 .mr(1)
40537 .nr(4)
40538 .kr(2)
40539 .sr(4)
40540 .m(1)
40541 .n(4)
40542 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080040543 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080040544 }
40545 }
40546
40547 TEST(QS8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8_strided_a) {
40548 for (size_t k = 9; k < 16; k++) {
40549 GemmMicrokernelTester()
40550 .mr(1)
40551 .nr(4)
40552 .kr(2)
40553 .sr(4)
40554 .m(1)
40555 .n(4)
40556 .k(k)
40557 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080040558 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080040559 }
40560 }
40561
40562 TEST(QS8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
40563 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080040564 for (uint32_t n = 1; n <= 4; n++) {
40565 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan0f1ed942021-12-08 23:25:50 -080040566 GemmMicrokernelTester()
40567 .mr(1)
40568 .nr(4)
40569 .kr(2)
40570 .sr(4)
40571 .m(m)
40572 .n(n)
40573 .k(k)
40574 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080040575 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080040576 }
40577 }
40578 }
40579 }
40580
40581 TEST(QS8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8) {
40582 for (size_t k = 16; k <= 80; k += 8) {
40583 GemmMicrokernelTester()
40584 .mr(1)
40585 .nr(4)
40586 .kr(2)
40587 .sr(4)
40588 .m(1)
40589 .n(4)
40590 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080040591 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080040592 }
40593 }
40594
40595 TEST(QS8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8_strided_a) {
40596 for (size_t k = 16; k <= 80; k += 8) {
40597 GemmMicrokernelTester()
40598 .mr(1)
40599 .nr(4)
40600 .kr(2)
40601 .sr(4)
40602 .m(1)
40603 .n(4)
40604 .k(k)
40605 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080040606 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080040607 }
40608 }
40609
40610 TEST(QS8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
40611 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080040612 for (uint32_t n = 1; n <= 4; n++) {
40613 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan0f1ed942021-12-08 23:25:50 -080040614 GemmMicrokernelTester()
40615 .mr(1)
40616 .nr(4)
40617 .kr(2)
40618 .sr(4)
40619 .m(m)
40620 .n(n)
40621 .k(k)
40622 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080040623 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080040624 }
40625 }
40626 }
40627 }
40628
40629 TEST(QS8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4) {
40630 for (uint32_t n = 5; n < 8; n++) {
40631 for (size_t k = 1; k <= 40; k += 9) {
40632 GemmMicrokernelTester()
40633 .mr(1)
40634 .nr(4)
40635 .kr(2)
40636 .sr(4)
40637 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080040638 .n(n)
Marat Dukhan0f1ed942021-12-08 23:25:50 -080040639 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080040640 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080040641 }
40642 }
40643 }
40644
40645 TEST(QS8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
40646 for (uint32_t n = 5; n < 8; n++) {
40647 for (size_t k = 1; k <= 40; k += 9) {
40648 GemmMicrokernelTester()
40649 .mr(1)
40650 .nr(4)
40651 .kr(2)
40652 .sr(4)
40653 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080040654 .n(n)
Marat Dukhan0f1ed942021-12-08 23:25:50 -080040655 .k(k)
40656 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080040657 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080040658 }
40659 }
40660 }
40661
40662 TEST(QS8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_a) {
40663 for (uint32_t n = 5; n < 8; n++) {
40664 for (size_t k = 1; k <= 40; k += 9) {
40665 GemmMicrokernelTester()
40666 .mr(1)
40667 .nr(4)
40668 .kr(2)
40669 .sr(4)
40670 .m(1)
40671 .n(n)
40672 .k(k)
40673 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080040674 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080040675 }
40676 }
40677 }
40678
40679 TEST(QS8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
40680 for (uint32_t n = 5; n < 8; n++) {
40681 for (size_t k = 1; k <= 40; k += 9) {
40682 for (uint32_t m = 1; m <= 1; m++) {
40683 GemmMicrokernelTester()
40684 .mr(1)
40685 .nr(4)
40686 .kr(2)
40687 .sr(4)
40688 .m(m)
40689 .n(n)
40690 .k(k)
40691 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080040692 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080040693 }
40694 }
40695 }
40696 }
40697
40698 TEST(QS8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4) {
40699 for (uint32_t n = 8; n <= 12; n += 4) {
40700 for (size_t k = 1; k <= 40; k += 9) {
40701 GemmMicrokernelTester()
40702 .mr(1)
40703 .nr(4)
40704 .kr(2)
40705 .sr(4)
40706 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080040707 .n(n)
Marat Dukhan0f1ed942021-12-08 23:25:50 -080040708 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080040709 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080040710 }
40711 }
40712 }
40713
40714 TEST(QS8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
40715 for (uint32_t n = 8; n <= 12; n += 4) {
40716 for (size_t k = 1; k <= 40; k += 9) {
40717 GemmMicrokernelTester()
40718 .mr(1)
40719 .nr(4)
40720 .kr(2)
40721 .sr(4)
40722 .m(1)
40723 .n(n)
40724 .k(k)
40725 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080040726 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080040727 }
40728 }
40729 }
40730
40731 TEST(QS8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_strided_a) {
40732 for (uint32_t n = 8; n <= 12; n += 4) {
40733 for (size_t k = 1; k <= 40; k += 9) {
40734 GemmMicrokernelTester()
40735 .mr(1)
40736 .nr(4)
40737 .kr(2)
40738 .sr(4)
40739 .m(1)
40740 .n(n)
40741 .k(k)
40742 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080040743 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080040744 }
40745 }
40746 }
40747
40748 TEST(QS8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
40749 for (uint32_t n = 8; n <= 12; n += 4) {
40750 for (size_t k = 1; k <= 40; k += 9) {
40751 for (uint32_t m = 1; m <= 1; m++) {
40752 GemmMicrokernelTester()
40753 .mr(1)
40754 .nr(4)
40755 .kr(2)
40756 .sr(4)
40757 .m(m)
40758 .n(n)
40759 .k(k)
40760 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080040761 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080040762 }
40763 }
40764 }
40765 }
40766
40767 TEST(QS8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
40768 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080040769 for (uint32_t n = 1; n <= 4; n++) {
40770 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan0f1ed942021-12-08 23:25:50 -080040771 GemmMicrokernelTester()
40772 .mr(1)
40773 .nr(4)
40774 .kr(2)
40775 .sr(4)
40776 .m(m)
40777 .n(n)
40778 .k(k)
40779 .cm_stride(7)
40780 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080040781 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080040782 }
40783 }
40784 }
40785 }
40786
40787 TEST(QS8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, qmin) {
40788 GemmMicrokernelTester()
40789 .mr(1)
40790 .nr(4)
40791 .kr(2)
40792 .sr(4)
40793 .m(1)
40794 .n(4)
40795 .k(8)
40796 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080040797 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080040798 }
40799
40800 TEST(QS8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, qmax) {
40801 GemmMicrokernelTester()
40802 .mr(1)
40803 .nr(4)
40804 .kr(2)
40805 .sr(4)
40806 .m(1)
40807 .n(4)
40808 .k(8)
40809 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080040810 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080040811 }
40812
40813 TEST(QS8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cm) {
40814 GemmMicrokernelTester()
40815 .mr(1)
40816 .nr(4)
40817 .kr(2)
40818 .sr(4)
40819 .m(1)
40820 .n(4)
40821 .k(8)
40822 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080040823 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080040824 }
Marat Dukhan4c617792021-12-21 15:47:58 -080040825#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan0f1ed942021-12-08 23:25:50 -080040826
40827
Marat Dukhan4c617792021-12-21 15:47:58 -080040828#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan0f1ed942021-12-08 23:25:50 -080040829 TEST(QS8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8) {
40830 GemmMicrokernelTester()
40831 .mr(3)
40832 .nr(4)
40833 .kr(2)
40834 .sr(4)
40835 .m(3)
40836 .n(4)
40837 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080040838 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080040839 }
40840
40841 TEST(QS8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cn) {
40842 GemmMicrokernelTester()
40843 .mr(3)
40844 .nr(4)
40845 .kr(2)
40846 .sr(4)
40847 .m(3)
40848 .n(4)
40849 .k(8)
40850 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080040851 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080040852 }
40853
40854 TEST(QS8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_strided_a) {
40855 GemmMicrokernelTester()
40856 .mr(3)
40857 .nr(4)
40858 .kr(2)
40859 .sr(4)
40860 .m(3)
40861 .n(4)
40862 .k(8)
40863 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080040864 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080040865 }
40866
40867 TEST(QS8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080040868 for (uint32_t n = 1; n <= 4; n++) {
40869 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan0f1ed942021-12-08 23:25:50 -080040870 GemmMicrokernelTester()
40871 .mr(3)
40872 .nr(4)
40873 .kr(2)
40874 .sr(4)
40875 .m(m)
40876 .n(n)
40877 .k(8)
40878 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080040879 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080040880 }
40881 }
40882 }
40883
40884 TEST(QS8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
40885 for (uint32_t m = 1; m <= 3; m++) {
40886 GemmMicrokernelTester()
40887 .mr(3)
40888 .nr(4)
40889 .kr(2)
40890 .sr(4)
40891 .m(m)
40892 .n(4)
40893 .k(8)
40894 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080040895 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080040896 }
40897 }
40898
40899 TEST(QS8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
40900 for (uint32_t n = 1; n <= 4; n++) {
40901 GemmMicrokernelTester()
40902 .mr(3)
40903 .nr(4)
40904 .kr(2)
40905 .sr(4)
40906 .m(3)
40907 .n(n)
40908 .k(8)
40909 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080040910 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080040911 }
40912 }
40913
40914 TEST(QS8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8) {
40915 for (size_t k = 1; k < 8; k++) {
40916 GemmMicrokernelTester()
40917 .mr(3)
40918 .nr(4)
40919 .kr(2)
40920 .sr(4)
40921 .m(3)
40922 .n(4)
40923 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080040924 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080040925 }
40926 }
40927
40928 TEST(QS8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8_strided_a) {
40929 for (size_t k = 1; k < 8; k++) {
40930 GemmMicrokernelTester()
40931 .mr(3)
40932 .nr(4)
40933 .kr(2)
40934 .sr(4)
40935 .m(3)
40936 .n(4)
40937 .k(k)
40938 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080040939 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080040940 }
40941 }
40942
40943 TEST(QS8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
40944 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080040945 for (uint32_t n = 1; n <= 4; n++) {
40946 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan0f1ed942021-12-08 23:25:50 -080040947 GemmMicrokernelTester()
40948 .mr(3)
40949 .nr(4)
40950 .kr(2)
40951 .sr(4)
40952 .m(m)
40953 .n(n)
40954 .k(k)
40955 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080040956 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080040957 }
40958 }
40959 }
40960 }
40961
40962 TEST(QS8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8) {
40963 for (size_t k = 9; k < 16; k++) {
40964 GemmMicrokernelTester()
40965 .mr(3)
40966 .nr(4)
40967 .kr(2)
40968 .sr(4)
40969 .m(3)
40970 .n(4)
40971 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080040972 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080040973 }
40974 }
40975
40976 TEST(QS8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8_strided_a) {
40977 for (size_t k = 9; k < 16; k++) {
40978 GemmMicrokernelTester()
40979 .mr(3)
40980 .nr(4)
40981 .kr(2)
40982 .sr(4)
40983 .m(3)
40984 .n(4)
40985 .k(k)
40986 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080040987 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080040988 }
40989 }
40990
40991 TEST(QS8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
40992 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080040993 for (uint32_t n = 1; n <= 4; n++) {
40994 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan0f1ed942021-12-08 23:25:50 -080040995 GemmMicrokernelTester()
40996 .mr(3)
40997 .nr(4)
40998 .kr(2)
40999 .sr(4)
41000 .m(m)
41001 .n(n)
41002 .k(k)
41003 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080041004 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080041005 }
41006 }
41007 }
41008 }
41009
41010 TEST(QS8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8) {
41011 for (size_t k = 16; k <= 80; k += 8) {
41012 GemmMicrokernelTester()
41013 .mr(3)
41014 .nr(4)
41015 .kr(2)
41016 .sr(4)
41017 .m(3)
41018 .n(4)
41019 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080041020 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080041021 }
41022 }
41023
41024 TEST(QS8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8_strided_a) {
41025 for (size_t k = 16; k <= 80; k += 8) {
41026 GemmMicrokernelTester()
41027 .mr(3)
41028 .nr(4)
41029 .kr(2)
41030 .sr(4)
41031 .m(3)
41032 .n(4)
41033 .k(k)
41034 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080041035 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080041036 }
41037 }
41038
41039 TEST(QS8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
41040 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080041041 for (uint32_t n = 1; n <= 4; n++) {
41042 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan0f1ed942021-12-08 23:25:50 -080041043 GemmMicrokernelTester()
41044 .mr(3)
41045 .nr(4)
41046 .kr(2)
41047 .sr(4)
41048 .m(m)
41049 .n(n)
41050 .k(k)
41051 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080041052 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080041053 }
41054 }
41055 }
41056 }
41057
41058 TEST(QS8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4) {
41059 for (uint32_t n = 5; n < 8; n++) {
41060 for (size_t k = 1; k <= 40; k += 9) {
41061 GemmMicrokernelTester()
41062 .mr(3)
41063 .nr(4)
41064 .kr(2)
41065 .sr(4)
41066 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080041067 .n(n)
Marat Dukhan0f1ed942021-12-08 23:25:50 -080041068 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080041069 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080041070 }
41071 }
41072 }
41073
41074 TEST(QS8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
41075 for (uint32_t n = 5; n < 8; n++) {
41076 for (size_t k = 1; k <= 40; k += 9) {
41077 GemmMicrokernelTester()
41078 .mr(3)
41079 .nr(4)
41080 .kr(2)
41081 .sr(4)
41082 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080041083 .n(n)
Marat Dukhan0f1ed942021-12-08 23:25:50 -080041084 .k(k)
41085 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080041086 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080041087 }
41088 }
41089 }
41090
41091 TEST(QS8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_a) {
41092 for (uint32_t n = 5; n < 8; n++) {
41093 for (size_t k = 1; k <= 40; k += 9) {
41094 GemmMicrokernelTester()
41095 .mr(3)
41096 .nr(4)
41097 .kr(2)
41098 .sr(4)
41099 .m(3)
41100 .n(n)
41101 .k(k)
41102 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080041103 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080041104 }
41105 }
41106 }
41107
41108 TEST(QS8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
41109 for (uint32_t n = 5; n < 8; n++) {
41110 for (size_t k = 1; k <= 40; k += 9) {
41111 for (uint32_t m = 1; m <= 3; m++) {
41112 GemmMicrokernelTester()
41113 .mr(3)
41114 .nr(4)
41115 .kr(2)
41116 .sr(4)
41117 .m(m)
41118 .n(n)
41119 .k(k)
41120 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080041121 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080041122 }
41123 }
41124 }
41125 }
41126
41127 TEST(QS8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4) {
41128 for (uint32_t n = 8; n <= 12; n += 4) {
41129 for (size_t k = 1; k <= 40; k += 9) {
41130 GemmMicrokernelTester()
41131 .mr(3)
41132 .nr(4)
41133 .kr(2)
41134 .sr(4)
41135 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080041136 .n(n)
Marat Dukhan0f1ed942021-12-08 23:25:50 -080041137 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080041138 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080041139 }
41140 }
41141 }
41142
41143 TEST(QS8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
41144 for (uint32_t n = 8; n <= 12; n += 4) {
41145 for (size_t k = 1; k <= 40; k += 9) {
41146 GemmMicrokernelTester()
41147 .mr(3)
41148 .nr(4)
41149 .kr(2)
41150 .sr(4)
41151 .m(3)
41152 .n(n)
41153 .k(k)
41154 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080041155 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080041156 }
41157 }
41158 }
41159
41160 TEST(QS8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_strided_a) {
41161 for (uint32_t n = 8; n <= 12; n += 4) {
41162 for (size_t k = 1; k <= 40; k += 9) {
41163 GemmMicrokernelTester()
41164 .mr(3)
41165 .nr(4)
41166 .kr(2)
41167 .sr(4)
41168 .m(3)
41169 .n(n)
41170 .k(k)
41171 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080041172 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080041173 }
41174 }
41175 }
41176
41177 TEST(QS8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
41178 for (uint32_t n = 8; n <= 12; n += 4) {
41179 for (size_t k = 1; k <= 40; k += 9) {
41180 for (uint32_t m = 1; m <= 3; m++) {
41181 GemmMicrokernelTester()
41182 .mr(3)
41183 .nr(4)
41184 .kr(2)
41185 .sr(4)
41186 .m(m)
41187 .n(n)
41188 .k(k)
41189 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080041190 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080041191 }
41192 }
41193 }
41194 }
41195
41196 TEST(QS8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
41197 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080041198 for (uint32_t n = 1; n <= 4; n++) {
41199 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan0f1ed942021-12-08 23:25:50 -080041200 GemmMicrokernelTester()
41201 .mr(3)
41202 .nr(4)
41203 .kr(2)
41204 .sr(4)
41205 .m(m)
41206 .n(n)
41207 .k(k)
41208 .cm_stride(7)
41209 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080041210 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080041211 }
41212 }
41213 }
41214 }
41215
41216 TEST(QS8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, qmin) {
41217 GemmMicrokernelTester()
41218 .mr(3)
41219 .nr(4)
41220 .kr(2)
41221 .sr(4)
41222 .m(3)
41223 .n(4)
41224 .k(8)
41225 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080041226 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080041227 }
41228
41229 TEST(QS8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, qmax) {
41230 GemmMicrokernelTester()
41231 .mr(3)
41232 .nr(4)
41233 .kr(2)
41234 .sr(4)
41235 .m(3)
41236 .n(4)
41237 .k(8)
41238 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080041239 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080041240 }
41241
41242 TEST(QS8_GEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cm) {
41243 GemmMicrokernelTester()
41244 .mr(3)
41245 .nr(4)
41246 .kr(2)
41247 .sr(4)
41248 .m(3)
41249 .n(4)
41250 .k(8)
41251 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080041252 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan0f1ed942021-12-08 23:25:50 -080041253 }
Marat Dukhan4c617792021-12-21 15:47:58 -080041254#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan0f1ed942021-12-08 23:25:50 -080041255
41256
Marat Dukhan4c617792021-12-21 15:47:58 -080041257#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan8dc106e2021-08-31 15:23:02 -070041258 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8) {
41259 GemmMicrokernelTester()
41260 .mr(2)
41261 .nr(4)
41262 .kr(8)
41263 .sr(1)
41264 .m(2)
41265 .n(4)
41266 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080041267 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070041268 }
41269
41270 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, strided_cn) {
41271 GemmMicrokernelTester()
41272 .mr(2)
41273 .nr(4)
41274 .kr(8)
41275 .sr(1)
41276 .m(2)
41277 .n(4)
41278 .k(8)
41279 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080041280 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070041281 }
41282
41283 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_strided_a) {
41284 GemmMicrokernelTester()
41285 .mr(2)
41286 .nr(4)
41287 .kr(8)
41288 .sr(1)
41289 .m(2)
41290 .n(4)
41291 .k(8)
41292 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080041293 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070041294 }
41295
41296 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080041297 for (uint32_t n = 1; n <= 4; n++) {
41298 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070041299 GemmMicrokernelTester()
41300 .mr(2)
41301 .nr(4)
41302 .kr(8)
41303 .sr(1)
41304 .m(m)
41305 .n(n)
41306 .k(8)
41307 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080041308 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070041309 }
41310 }
41311 }
41312
41313 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
41314 for (uint32_t m = 1; m <= 2; m++) {
41315 GemmMicrokernelTester()
41316 .mr(2)
41317 .nr(4)
41318 .kr(8)
41319 .sr(1)
41320 .m(m)
41321 .n(4)
41322 .k(8)
41323 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080041324 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070041325 }
41326 }
41327
41328 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
41329 for (uint32_t n = 1; n <= 4; n++) {
41330 GemmMicrokernelTester()
41331 .mr(2)
41332 .nr(4)
41333 .kr(8)
41334 .sr(1)
41335 .m(2)
41336 .n(n)
41337 .k(8)
41338 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080041339 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070041340 }
41341 }
41342
41343 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8) {
41344 for (size_t k = 1; k < 8; k++) {
41345 GemmMicrokernelTester()
41346 .mr(2)
41347 .nr(4)
41348 .kr(8)
41349 .sr(1)
41350 .m(2)
41351 .n(4)
41352 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080041353 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070041354 }
41355 }
41356
41357 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8_strided_a) {
41358 for (size_t k = 1; k < 8; k++) {
41359 GemmMicrokernelTester()
41360 .mr(2)
41361 .nr(4)
41362 .kr(8)
41363 .sr(1)
41364 .m(2)
41365 .n(4)
41366 .k(k)
41367 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080041368 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070041369 }
41370 }
41371
41372 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
41373 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080041374 for (uint32_t n = 1; n <= 4; n++) {
41375 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070041376 GemmMicrokernelTester()
41377 .mr(2)
41378 .nr(4)
41379 .kr(8)
41380 .sr(1)
41381 .m(m)
41382 .n(n)
41383 .k(k)
41384 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080041385 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070041386 }
41387 }
41388 }
41389 }
41390
41391 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8) {
41392 for (size_t k = 9; k < 16; k++) {
41393 GemmMicrokernelTester()
41394 .mr(2)
41395 .nr(4)
41396 .kr(8)
41397 .sr(1)
41398 .m(2)
41399 .n(4)
41400 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080041401 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070041402 }
41403 }
41404
41405 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8_strided_a) {
41406 for (size_t k = 9; k < 16; k++) {
41407 GemmMicrokernelTester()
41408 .mr(2)
41409 .nr(4)
41410 .kr(8)
41411 .sr(1)
41412 .m(2)
41413 .n(4)
41414 .k(k)
41415 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080041416 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070041417 }
41418 }
41419
41420 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
41421 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080041422 for (uint32_t n = 1; n <= 4; n++) {
41423 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070041424 GemmMicrokernelTester()
41425 .mr(2)
41426 .nr(4)
41427 .kr(8)
41428 .sr(1)
41429 .m(m)
41430 .n(n)
41431 .k(k)
41432 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080041433 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070041434 }
41435 }
41436 }
41437 }
41438
41439 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_div_8) {
41440 for (size_t k = 16; k <= 80; k += 8) {
41441 GemmMicrokernelTester()
41442 .mr(2)
41443 .nr(4)
41444 .kr(8)
41445 .sr(1)
41446 .m(2)
41447 .n(4)
41448 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080041449 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070041450 }
41451 }
41452
41453 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_div_8_strided_a) {
41454 for (size_t k = 16; k <= 80; k += 8) {
41455 GemmMicrokernelTester()
41456 .mr(2)
41457 .nr(4)
41458 .kr(8)
41459 .sr(1)
41460 .m(2)
41461 .n(4)
41462 .k(k)
41463 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080041464 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070041465 }
41466 }
41467
41468 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
41469 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080041470 for (uint32_t n = 1; n <= 4; n++) {
41471 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070041472 GemmMicrokernelTester()
41473 .mr(2)
41474 .nr(4)
41475 .kr(8)
41476 .sr(1)
41477 .m(m)
41478 .n(n)
41479 .k(k)
41480 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080041481 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070041482 }
41483 }
41484 }
41485 }
41486
41487 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4) {
41488 for (uint32_t n = 5; n < 8; n++) {
41489 for (size_t k = 1; k <= 40; k += 9) {
41490 GemmMicrokernelTester()
41491 .mr(2)
41492 .nr(4)
41493 .kr(8)
41494 .sr(1)
41495 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080041496 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070041497 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080041498 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070041499 }
41500 }
41501 }
41502
41503 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
41504 for (uint32_t n = 5; n < 8; n++) {
41505 for (size_t k = 1; k <= 40; k += 9) {
41506 GemmMicrokernelTester()
41507 .mr(2)
41508 .nr(4)
41509 .kr(8)
41510 .sr(1)
41511 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080041512 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070041513 .k(k)
41514 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080041515 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070041516 }
41517 }
41518 }
41519
41520 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_a) {
41521 for (uint32_t n = 5; n < 8; n++) {
41522 for (size_t k = 1; k <= 40; k += 9) {
41523 GemmMicrokernelTester()
41524 .mr(2)
41525 .nr(4)
41526 .kr(8)
41527 .sr(1)
41528 .m(2)
41529 .n(n)
41530 .k(k)
41531 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080041532 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070041533 }
41534 }
41535 }
41536
41537 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
41538 for (uint32_t n = 5; n < 8; n++) {
41539 for (size_t k = 1; k <= 40; k += 9) {
41540 for (uint32_t m = 1; m <= 2; m++) {
41541 GemmMicrokernelTester()
41542 .mr(2)
41543 .nr(4)
41544 .kr(8)
41545 .sr(1)
41546 .m(m)
41547 .n(n)
41548 .k(k)
41549 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080041550 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070041551 }
41552 }
41553 }
41554 }
41555
41556 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, n_div_4) {
41557 for (uint32_t n = 8; n <= 12; n += 4) {
41558 for (size_t k = 1; k <= 40; k += 9) {
41559 GemmMicrokernelTester()
41560 .mr(2)
41561 .nr(4)
41562 .kr(8)
41563 .sr(1)
41564 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080041565 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070041566 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080041567 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070041568 }
41569 }
41570 }
41571
41572 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
41573 for (uint32_t n = 8; n <= 12; n += 4) {
41574 for (size_t k = 1; k <= 40; k += 9) {
41575 GemmMicrokernelTester()
41576 .mr(2)
41577 .nr(4)
41578 .kr(8)
41579 .sr(1)
41580 .m(2)
41581 .n(n)
41582 .k(k)
41583 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080041584 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070041585 }
41586 }
41587 }
41588
41589 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_strided_a) {
41590 for (uint32_t n = 8; n <= 12; n += 4) {
41591 for (size_t k = 1; k <= 40; k += 9) {
41592 GemmMicrokernelTester()
41593 .mr(2)
41594 .nr(4)
41595 .kr(8)
41596 .sr(1)
41597 .m(2)
41598 .n(n)
41599 .k(k)
41600 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080041601 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070041602 }
41603 }
41604 }
41605
41606 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
41607 for (uint32_t n = 8; n <= 12; n += 4) {
41608 for (size_t k = 1; k <= 40; k += 9) {
41609 for (uint32_t m = 1; m <= 2; m++) {
41610 GemmMicrokernelTester()
41611 .mr(2)
41612 .nr(4)
41613 .kr(8)
41614 .sr(1)
41615 .m(m)
41616 .n(n)
41617 .k(k)
41618 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080041619 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070041620 }
41621 }
41622 }
41623 }
41624
41625 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
41626 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080041627 for (uint32_t n = 1; n <= 4; n++) {
41628 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070041629 GemmMicrokernelTester()
41630 .mr(2)
41631 .nr(4)
41632 .kr(8)
41633 .sr(1)
41634 .m(m)
41635 .n(n)
41636 .k(k)
41637 .cm_stride(7)
41638 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080041639 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070041640 }
41641 }
41642 }
41643 }
41644
41645 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, qmin) {
41646 GemmMicrokernelTester()
41647 .mr(2)
41648 .nr(4)
41649 .kr(8)
41650 .sr(1)
41651 .m(2)
41652 .n(4)
41653 .k(8)
41654 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080041655 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070041656 }
41657
41658 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, qmax) {
41659 GemmMicrokernelTester()
41660 .mr(2)
41661 .nr(4)
41662 .kr(8)
41663 .sr(1)
41664 .m(2)
41665 .n(4)
41666 .k(8)
41667 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080041668 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070041669 }
41670
41671 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, strided_cm) {
41672 GemmMicrokernelTester()
41673 .mr(2)
41674 .nr(4)
41675 .kr(8)
41676 .sr(1)
41677 .m(2)
41678 .n(4)
41679 .k(8)
41680 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080041681 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070041682 }
Marat Dukhan4c617792021-12-21 15:47:58 -080041683#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan8dc106e2021-08-31 15:23:02 -070041684
41685
Marat Dukhan4c617792021-12-21 15:47:58 -080041686#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan8dc106e2021-08-31 15:23:02 -070041687 TEST(QS8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8) {
41688 GemmMicrokernelTester()
41689 .mr(4)
41690 .nr(4)
41691 .kr(8)
41692 .sr(1)
41693 .m(4)
41694 .n(4)
41695 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080041696 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070041697 }
41698
41699 TEST(QS8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, strided_cn) {
41700 GemmMicrokernelTester()
41701 .mr(4)
41702 .nr(4)
41703 .kr(8)
41704 .sr(1)
41705 .m(4)
41706 .n(4)
41707 .k(8)
41708 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080041709 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070041710 }
41711
41712 TEST(QS8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_strided_a) {
41713 GemmMicrokernelTester()
41714 .mr(4)
41715 .nr(4)
41716 .kr(8)
41717 .sr(1)
41718 .m(4)
41719 .n(4)
41720 .k(8)
41721 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080041722 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070041723 }
41724
41725 TEST(QS8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080041726 for (uint32_t n = 1; n <= 4; n++) {
41727 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070041728 GemmMicrokernelTester()
41729 .mr(4)
41730 .nr(4)
41731 .kr(8)
41732 .sr(1)
41733 .m(m)
41734 .n(n)
41735 .k(8)
41736 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080041737 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070041738 }
41739 }
41740 }
41741
41742 TEST(QS8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
41743 for (uint32_t m = 1; m <= 4; m++) {
41744 GemmMicrokernelTester()
41745 .mr(4)
41746 .nr(4)
41747 .kr(8)
41748 .sr(1)
41749 .m(m)
41750 .n(4)
41751 .k(8)
41752 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080041753 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070041754 }
41755 }
41756
41757 TEST(QS8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
41758 for (uint32_t n = 1; n <= 4; n++) {
41759 GemmMicrokernelTester()
41760 .mr(4)
41761 .nr(4)
41762 .kr(8)
41763 .sr(1)
41764 .m(4)
41765 .n(n)
41766 .k(8)
41767 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080041768 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070041769 }
41770 }
41771
41772 TEST(QS8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8) {
41773 for (size_t k = 1; k < 8; k++) {
41774 GemmMicrokernelTester()
41775 .mr(4)
41776 .nr(4)
41777 .kr(8)
41778 .sr(1)
41779 .m(4)
41780 .n(4)
41781 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080041782 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070041783 }
41784 }
41785
41786 TEST(QS8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8_strided_a) {
41787 for (size_t k = 1; k < 8; k++) {
41788 GemmMicrokernelTester()
41789 .mr(4)
41790 .nr(4)
41791 .kr(8)
41792 .sr(1)
41793 .m(4)
41794 .n(4)
41795 .k(k)
41796 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080041797 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070041798 }
41799 }
41800
41801 TEST(QS8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
41802 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080041803 for (uint32_t n = 1; n <= 4; n++) {
41804 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070041805 GemmMicrokernelTester()
41806 .mr(4)
41807 .nr(4)
41808 .kr(8)
41809 .sr(1)
41810 .m(m)
41811 .n(n)
41812 .k(k)
41813 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080041814 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070041815 }
41816 }
41817 }
41818 }
41819
41820 TEST(QS8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8) {
41821 for (size_t k = 9; k < 16; k++) {
41822 GemmMicrokernelTester()
41823 .mr(4)
41824 .nr(4)
41825 .kr(8)
41826 .sr(1)
41827 .m(4)
41828 .n(4)
41829 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080041830 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070041831 }
41832 }
41833
41834 TEST(QS8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8_strided_a) {
41835 for (size_t k = 9; k < 16; k++) {
41836 GemmMicrokernelTester()
41837 .mr(4)
41838 .nr(4)
41839 .kr(8)
41840 .sr(1)
41841 .m(4)
41842 .n(4)
41843 .k(k)
41844 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080041845 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070041846 }
41847 }
41848
41849 TEST(QS8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
41850 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080041851 for (uint32_t n = 1; n <= 4; n++) {
41852 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070041853 GemmMicrokernelTester()
41854 .mr(4)
41855 .nr(4)
41856 .kr(8)
41857 .sr(1)
41858 .m(m)
41859 .n(n)
41860 .k(k)
41861 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080041862 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070041863 }
41864 }
41865 }
41866 }
41867
41868 TEST(QS8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_div_8) {
41869 for (size_t k = 16; k <= 80; k += 8) {
41870 GemmMicrokernelTester()
41871 .mr(4)
41872 .nr(4)
41873 .kr(8)
41874 .sr(1)
41875 .m(4)
41876 .n(4)
41877 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080041878 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070041879 }
41880 }
41881
41882 TEST(QS8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_div_8_strided_a) {
41883 for (size_t k = 16; k <= 80; k += 8) {
41884 GemmMicrokernelTester()
41885 .mr(4)
41886 .nr(4)
41887 .kr(8)
41888 .sr(1)
41889 .m(4)
41890 .n(4)
41891 .k(k)
41892 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080041893 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070041894 }
41895 }
41896
41897 TEST(QS8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
41898 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080041899 for (uint32_t n = 1; n <= 4; n++) {
41900 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070041901 GemmMicrokernelTester()
41902 .mr(4)
41903 .nr(4)
41904 .kr(8)
41905 .sr(1)
41906 .m(m)
41907 .n(n)
41908 .k(k)
41909 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080041910 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070041911 }
41912 }
41913 }
41914 }
41915
41916 TEST(QS8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4) {
41917 for (uint32_t n = 5; n < 8; n++) {
41918 for (size_t k = 1; k <= 40; k += 9) {
41919 GemmMicrokernelTester()
41920 .mr(4)
41921 .nr(4)
41922 .kr(8)
41923 .sr(1)
41924 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080041925 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070041926 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080041927 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070041928 }
41929 }
41930 }
41931
41932 TEST(QS8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
41933 for (uint32_t n = 5; n < 8; n++) {
41934 for (size_t k = 1; k <= 40; k += 9) {
41935 GemmMicrokernelTester()
41936 .mr(4)
41937 .nr(4)
41938 .kr(8)
41939 .sr(1)
41940 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080041941 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070041942 .k(k)
41943 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080041944 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070041945 }
41946 }
41947 }
41948
41949 TEST(QS8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_a) {
41950 for (uint32_t n = 5; n < 8; n++) {
41951 for (size_t k = 1; k <= 40; k += 9) {
41952 GemmMicrokernelTester()
41953 .mr(4)
41954 .nr(4)
41955 .kr(8)
41956 .sr(1)
41957 .m(4)
41958 .n(n)
41959 .k(k)
41960 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080041961 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070041962 }
41963 }
41964 }
41965
41966 TEST(QS8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
41967 for (uint32_t n = 5; n < 8; n++) {
41968 for (size_t k = 1; k <= 40; k += 9) {
41969 for (uint32_t m = 1; m <= 4; m++) {
41970 GemmMicrokernelTester()
41971 .mr(4)
41972 .nr(4)
41973 .kr(8)
41974 .sr(1)
41975 .m(m)
41976 .n(n)
41977 .k(k)
41978 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080041979 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070041980 }
41981 }
41982 }
41983 }
41984
41985 TEST(QS8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, n_div_4) {
41986 for (uint32_t n = 8; n <= 12; n += 4) {
41987 for (size_t k = 1; k <= 40; k += 9) {
41988 GemmMicrokernelTester()
41989 .mr(4)
41990 .nr(4)
41991 .kr(8)
41992 .sr(1)
41993 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080041994 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070041995 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080041996 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070041997 }
41998 }
41999 }
42000
42001 TEST(QS8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
42002 for (uint32_t n = 8; n <= 12; n += 4) {
42003 for (size_t k = 1; k <= 40; k += 9) {
42004 GemmMicrokernelTester()
42005 .mr(4)
42006 .nr(4)
42007 .kr(8)
42008 .sr(1)
42009 .m(4)
42010 .n(n)
42011 .k(k)
42012 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080042013 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042014 }
42015 }
42016 }
42017
42018 TEST(QS8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_strided_a) {
42019 for (uint32_t n = 8; n <= 12; n += 4) {
42020 for (size_t k = 1; k <= 40; k += 9) {
42021 GemmMicrokernelTester()
42022 .mr(4)
42023 .nr(4)
42024 .kr(8)
42025 .sr(1)
42026 .m(4)
42027 .n(n)
42028 .k(k)
42029 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080042030 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042031 }
42032 }
42033 }
42034
42035 TEST(QS8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
42036 for (uint32_t n = 8; n <= 12; n += 4) {
42037 for (size_t k = 1; k <= 40; k += 9) {
42038 for (uint32_t m = 1; m <= 4; m++) {
42039 GemmMicrokernelTester()
42040 .mr(4)
42041 .nr(4)
42042 .kr(8)
42043 .sr(1)
42044 .m(m)
42045 .n(n)
42046 .k(k)
42047 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080042048 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042049 }
42050 }
42051 }
42052 }
42053
42054 TEST(QS8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
42055 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080042056 for (uint32_t n = 1; n <= 4; n++) {
42057 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042058 GemmMicrokernelTester()
42059 .mr(4)
42060 .nr(4)
42061 .kr(8)
42062 .sr(1)
42063 .m(m)
42064 .n(n)
42065 .k(k)
42066 .cm_stride(7)
42067 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080042068 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042069 }
42070 }
42071 }
42072 }
42073
42074 TEST(QS8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, qmin) {
42075 GemmMicrokernelTester()
42076 .mr(4)
42077 .nr(4)
42078 .kr(8)
42079 .sr(1)
42080 .m(4)
42081 .n(4)
42082 .k(8)
42083 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080042084 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042085 }
42086
42087 TEST(QS8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, qmax) {
42088 GemmMicrokernelTester()
42089 .mr(4)
42090 .nr(4)
42091 .kr(8)
42092 .sr(1)
42093 .m(4)
42094 .n(4)
42095 .k(8)
42096 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080042097 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042098 }
42099
42100 TEST(QS8_GEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, strided_cm) {
42101 GemmMicrokernelTester()
42102 .mr(4)
42103 .nr(4)
42104 .kr(8)
42105 .sr(1)
42106 .m(4)
42107 .n(4)
42108 .k(8)
42109 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080042110 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042111 }
Marat Dukhan4c617792021-12-21 15:47:58 -080042112#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042113
42114
Marat Dukhan4c617792021-12-21 15:47:58 -080042115#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042116 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8) {
42117 GemmMicrokernelTester()
42118 .mr(1)
42119 .nr(4)
42120 .kr(8)
42121 .sr(1)
42122 .m(1)
42123 .n(4)
42124 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080042125 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042126 }
42127
42128 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, strided_cn) {
42129 GemmMicrokernelTester()
42130 .mr(1)
42131 .nr(4)
42132 .kr(8)
42133 .sr(1)
42134 .m(1)
42135 .n(4)
42136 .k(8)
42137 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080042138 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042139 }
42140
42141 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_strided_a) {
42142 GemmMicrokernelTester()
42143 .mr(1)
42144 .nr(4)
42145 .kr(8)
42146 .sr(1)
42147 .m(1)
42148 .n(4)
42149 .k(8)
42150 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080042151 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042152 }
42153
42154 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080042155 for (uint32_t n = 1; n <= 4; n++) {
42156 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042157 GemmMicrokernelTester()
42158 .mr(1)
42159 .nr(4)
42160 .kr(8)
42161 .sr(1)
42162 .m(m)
42163 .n(n)
42164 .k(8)
42165 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080042166 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042167 }
42168 }
42169 }
42170
42171 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
42172 for (uint32_t m = 1; m <= 1; m++) {
42173 GemmMicrokernelTester()
42174 .mr(1)
42175 .nr(4)
42176 .kr(8)
42177 .sr(1)
42178 .m(m)
42179 .n(4)
42180 .k(8)
42181 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080042182 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042183 }
42184 }
42185
42186 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
42187 for (uint32_t n = 1; n <= 4; n++) {
42188 GemmMicrokernelTester()
42189 .mr(1)
42190 .nr(4)
42191 .kr(8)
42192 .sr(1)
42193 .m(1)
42194 .n(n)
42195 .k(8)
42196 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080042197 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042198 }
42199 }
42200
42201 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_lt_8) {
42202 for (size_t k = 1; k < 8; k++) {
42203 GemmMicrokernelTester()
42204 .mr(1)
42205 .nr(4)
42206 .kr(8)
42207 .sr(1)
42208 .m(1)
42209 .n(4)
42210 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080042211 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042212 }
42213 }
42214
42215 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_lt_8_strided_a) {
42216 for (size_t k = 1; k < 8; k++) {
42217 GemmMicrokernelTester()
42218 .mr(1)
42219 .nr(4)
42220 .kr(8)
42221 .sr(1)
42222 .m(1)
42223 .n(4)
42224 .k(k)
42225 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080042226 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042227 }
42228 }
42229
42230 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
42231 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080042232 for (uint32_t n = 1; n <= 4; n++) {
42233 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042234 GemmMicrokernelTester()
42235 .mr(1)
42236 .nr(4)
42237 .kr(8)
42238 .sr(1)
42239 .m(m)
42240 .n(n)
42241 .k(k)
42242 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080042243 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042244 }
42245 }
42246 }
42247 }
42248
42249 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_gt_8) {
42250 for (size_t k = 9; k < 16; k++) {
42251 GemmMicrokernelTester()
42252 .mr(1)
42253 .nr(4)
42254 .kr(8)
42255 .sr(1)
42256 .m(1)
42257 .n(4)
42258 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080042259 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042260 }
42261 }
42262
42263 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_gt_8_strided_a) {
42264 for (size_t k = 9; k < 16; k++) {
42265 GemmMicrokernelTester()
42266 .mr(1)
42267 .nr(4)
42268 .kr(8)
42269 .sr(1)
42270 .m(1)
42271 .n(4)
42272 .k(k)
42273 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080042274 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042275 }
42276 }
42277
42278 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
42279 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080042280 for (uint32_t n = 1; n <= 4; n++) {
42281 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042282 GemmMicrokernelTester()
42283 .mr(1)
42284 .nr(4)
42285 .kr(8)
42286 .sr(1)
42287 .m(m)
42288 .n(n)
42289 .k(k)
42290 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080042291 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042292 }
42293 }
42294 }
42295 }
42296
42297 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_div_8) {
42298 for (size_t k = 16; k <= 80; k += 8) {
42299 GemmMicrokernelTester()
42300 .mr(1)
42301 .nr(4)
42302 .kr(8)
42303 .sr(1)
42304 .m(1)
42305 .n(4)
42306 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080042307 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042308 }
42309 }
42310
42311 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_div_8_strided_a) {
42312 for (size_t k = 16; k <= 80; k += 8) {
42313 GemmMicrokernelTester()
42314 .mr(1)
42315 .nr(4)
42316 .kr(8)
42317 .sr(1)
42318 .m(1)
42319 .n(4)
42320 .k(k)
42321 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080042322 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042323 }
42324 }
42325
42326 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
42327 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080042328 for (uint32_t n = 1; n <= 4; n++) {
42329 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042330 GemmMicrokernelTester()
42331 .mr(1)
42332 .nr(4)
42333 .kr(8)
42334 .sr(1)
42335 .m(m)
42336 .n(n)
42337 .k(k)
42338 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080042339 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042340 }
42341 }
42342 }
42343 }
42344
42345 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4) {
42346 for (uint32_t n = 5; n < 8; n++) {
42347 for (size_t k = 1; k <= 40; k += 9) {
42348 GemmMicrokernelTester()
42349 .mr(1)
42350 .nr(4)
42351 .kr(8)
42352 .sr(1)
42353 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080042354 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042355 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080042356 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042357 }
42358 }
42359 }
42360
42361 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
42362 for (uint32_t n = 5; n < 8; n++) {
42363 for (size_t k = 1; k <= 40; k += 9) {
42364 GemmMicrokernelTester()
42365 .mr(1)
42366 .nr(4)
42367 .kr(8)
42368 .sr(1)
42369 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080042370 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042371 .k(k)
42372 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080042373 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042374 }
42375 }
42376 }
42377
42378 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_a) {
42379 for (uint32_t n = 5; n < 8; n++) {
42380 for (size_t k = 1; k <= 40; k += 9) {
42381 GemmMicrokernelTester()
42382 .mr(1)
42383 .nr(4)
42384 .kr(8)
42385 .sr(1)
42386 .m(1)
42387 .n(n)
42388 .k(k)
42389 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080042390 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042391 }
42392 }
42393 }
42394
42395 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
42396 for (uint32_t n = 5; n < 8; n++) {
42397 for (size_t k = 1; k <= 40; k += 9) {
42398 for (uint32_t m = 1; m <= 1; m++) {
42399 GemmMicrokernelTester()
42400 .mr(1)
42401 .nr(4)
42402 .kr(8)
42403 .sr(1)
42404 .m(m)
42405 .n(n)
42406 .k(k)
42407 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080042408 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042409 }
42410 }
42411 }
42412 }
42413
42414 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, n_div_4) {
42415 for (uint32_t n = 8; n <= 12; n += 4) {
42416 for (size_t k = 1; k <= 40; k += 9) {
42417 GemmMicrokernelTester()
42418 .mr(1)
42419 .nr(4)
42420 .kr(8)
42421 .sr(1)
42422 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080042423 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042424 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080042425 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042426 }
42427 }
42428 }
42429
42430 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
42431 for (uint32_t n = 8; n <= 12; n += 4) {
42432 for (size_t k = 1; k <= 40; k += 9) {
42433 GemmMicrokernelTester()
42434 .mr(1)
42435 .nr(4)
42436 .kr(8)
42437 .sr(1)
42438 .m(1)
42439 .n(n)
42440 .k(k)
42441 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080042442 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042443 }
42444 }
42445 }
42446
42447 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_strided_a) {
42448 for (uint32_t n = 8; n <= 12; n += 4) {
42449 for (size_t k = 1; k <= 40; k += 9) {
42450 GemmMicrokernelTester()
42451 .mr(1)
42452 .nr(4)
42453 .kr(8)
42454 .sr(1)
42455 .m(1)
42456 .n(n)
42457 .k(k)
42458 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080042459 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042460 }
42461 }
42462 }
42463
42464 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
42465 for (uint32_t n = 8; n <= 12; n += 4) {
42466 for (size_t k = 1; k <= 40; k += 9) {
42467 for (uint32_t m = 1; m <= 1; m++) {
42468 GemmMicrokernelTester()
42469 .mr(1)
42470 .nr(4)
42471 .kr(8)
42472 .sr(1)
42473 .m(m)
42474 .n(n)
42475 .k(k)
42476 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080042477 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042478 }
42479 }
42480 }
42481 }
42482
42483 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
42484 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080042485 for (uint32_t n = 1; n <= 4; n++) {
42486 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042487 GemmMicrokernelTester()
42488 .mr(1)
42489 .nr(4)
42490 .kr(8)
42491 .sr(1)
42492 .m(m)
42493 .n(n)
42494 .k(k)
42495 .cm_stride(7)
42496 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080042497 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042498 }
42499 }
42500 }
42501 }
42502
42503 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, qmin) {
42504 GemmMicrokernelTester()
42505 .mr(1)
42506 .nr(4)
42507 .kr(8)
42508 .sr(1)
42509 .m(1)
42510 .n(4)
42511 .k(8)
42512 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080042513 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042514 }
42515
42516 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, qmax) {
42517 GemmMicrokernelTester()
42518 .mr(1)
42519 .nr(4)
42520 .kr(8)
42521 .sr(1)
42522 .m(1)
42523 .n(4)
42524 .k(8)
42525 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080042526 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042527 }
42528
42529 TEST(QS8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, strided_cm) {
42530 GemmMicrokernelTester()
42531 .mr(1)
42532 .nr(4)
42533 .kr(8)
42534 .sr(1)
42535 .m(1)
42536 .n(4)
42537 .k(8)
42538 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080042539 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042540 }
Marat Dukhan4c617792021-12-21 15:47:58 -080042541#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042542
42543
Marat Dukhan4c617792021-12-21 15:47:58 -080042544#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042545 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8) {
42546 GemmMicrokernelTester()
42547 .mr(2)
42548 .nr(4)
42549 .kr(8)
42550 .sr(1)
42551 .m(2)
42552 .n(4)
42553 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080042554 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042555 }
42556
42557 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, strided_cn) {
42558 GemmMicrokernelTester()
42559 .mr(2)
42560 .nr(4)
42561 .kr(8)
42562 .sr(1)
42563 .m(2)
42564 .n(4)
42565 .k(8)
42566 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080042567 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042568 }
42569
42570 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_strided_a) {
42571 GemmMicrokernelTester()
42572 .mr(2)
42573 .nr(4)
42574 .kr(8)
42575 .sr(1)
42576 .m(2)
42577 .n(4)
42578 .k(8)
42579 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080042580 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042581 }
42582
42583 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080042584 for (uint32_t n = 1; n <= 4; n++) {
42585 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042586 GemmMicrokernelTester()
42587 .mr(2)
42588 .nr(4)
42589 .kr(8)
42590 .sr(1)
42591 .m(m)
42592 .n(n)
42593 .k(8)
42594 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080042595 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042596 }
42597 }
42598 }
42599
42600 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
42601 for (uint32_t m = 1; m <= 2; m++) {
42602 GemmMicrokernelTester()
42603 .mr(2)
42604 .nr(4)
42605 .kr(8)
42606 .sr(1)
42607 .m(m)
42608 .n(4)
42609 .k(8)
42610 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080042611 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042612 }
42613 }
42614
42615 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
42616 for (uint32_t n = 1; n <= 4; n++) {
42617 GemmMicrokernelTester()
42618 .mr(2)
42619 .nr(4)
42620 .kr(8)
42621 .sr(1)
42622 .m(2)
42623 .n(n)
42624 .k(8)
42625 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080042626 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042627 }
42628 }
42629
42630 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_lt_8) {
42631 for (size_t k = 1; k < 8; k++) {
42632 GemmMicrokernelTester()
42633 .mr(2)
42634 .nr(4)
42635 .kr(8)
42636 .sr(1)
42637 .m(2)
42638 .n(4)
42639 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080042640 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042641 }
42642 }
42643
42644 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_lt_8_strided_a) {
42645 for (size_t k = 1; k < 8; k++) {
42646 GemmMicrokernelTester()
42647 .mr(2)
42648 .nr(4)
42649 .kr(8)
42650 .sr(1)
42651 .m(2)
42652 .n(4)
42653 .k(k)
42654 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080042655 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042656 }
42657 }
42658
42659 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
42660 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080042661 for (uint32_t n = 1; n <= 4; n++) {
42662 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042663 GemmMicrokernelTester()
42664 .mr(2)
42665 .nr(4)
42666 .kr(8)
42667 .sr(1)
42668 .m(m)
42669 .n(n)
42670 .k(k)
42671 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080042672 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042673 }
42674 }
42675 }
42676 }
42677
42678 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_gt_8) {
42679 for (size_t k = 9; k < 16; k++) {
42680 GemmMicrokernelTester()
42681 .mr(2)
42682 .nr(4)
42683 .kr(8)
42684 .sr(1)
42685 .m(2)
42686 .n(4)
42687 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080042688 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042689 }
42690 }
42691
42692 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_gt_8_strided_a) {
42693 for (size_t k = 9; k < 16; k++) {
42694 GemmMicrokernelTester()
42695 .mr(2)
42696 .nr(4)
42697 .kr(8)
42698 .sr(1)
42699 .m(2)
42700 .n(4)
42701 .k(k)
42702 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080042703 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042704 }
42705 }
42706
42707 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
42708 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080042709 for (uint32_t n = 1; n <= 4; n++) {
42710 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042711 GemmMicrokernelTester()
42712 .mr(2)
42713 .nr(4)
42714 .kr(8)
42715 .sr(1)
42716 .m(m)
42717 .n(n)
42718 .k(k)
42719 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080042720 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042721 }
42722 }
42723 }
42724 }
42725
42726 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_div_8) {
42727 for (size_t k = 16; k <= 80; k += 8) {
42728 GemmMicrokernelTester()
42729 .mr(2)
42730 .nr(4)
42731 .kr(8)
42732 .sr(1)
42733 .m(2)
42734 .n(4)
42735 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080042736 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042737 }
42738 }
42739
42740 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_div_8_strided_a) {
42741 for (size_t k = 16; k <= 80; k += 8) {
42742 GemmMicrokernelTester()
42743 .mr(2)
42744 .nr(4)
42745 .kr(8)
42746 .sr(1)
42747 .m(2)
42748 .n(4)
42749 .k(k)
42750 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080042751 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042752 }
42753 }
42754
42755 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
42756 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080042757 for (uint32_t n = 1; n <= 4; n++) {
42758 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042759 GemmMicrokernelTester()
42760 .mr(2)
42761 .nr(4)
42762 .kr(8)
42763 .sr(1)
42764 .m(m)
42765 .n(n)
42766 .k(k)
42767 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080042768 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042769 }
42770 }
42771 }
42772 }
42773
42774 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4) {
42775 for (uint32_t n = 5; n < 8; n++) {
42776 for (size_t k = 1; k <= 40; k += 9) {
42777 GemmMicrokernelTester()
42778 .mr(2)
42779 .nr(4)
42780 .kr(8)
42781 .sr(1)
42782 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080042783 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042784 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080042785 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042786 }
42787 }
42788 }
42789
42790 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
42791 for (uint32_t n = 5; n < 8; n++) {
42792 for (size_t k = 1; k <= 40; k += 9) {
42793 GemmMicrokernelTester()
42794 .mr(2)
42795 .nr(4)
42796 .kr(8)
42797 .sr(1)
42798 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080042799 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042800 .k(k)
42801 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080042802 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042803 }
42804 }
42805 }
42806
42807 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_a) {
42808 for (uint32_t n = 5; n < 8; n++) {
42809 for (size_t k = 1; k <= 40; k += 9) {
42810 GemmMicrokernelTester()
42811 .mr(2)
42812 .nr(4)
42813 .kr(8)
42814 .sr(1)
42815 .m(2)
42816 .n(n)
42817 .k(k)
42818 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080042819 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042820 }
42821 }
42822 }
42823
42824 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
42825 for (uint32_t n = 5; n < 8; n++) {
42826 for (size_t k = 1; k <= 40; k += 9) {
42827 for (uint32_t m = 1; m <= 2; m++) {
42828 GemmMicrokernelTester()
42829 .mr(2)
42830 .nr(4)
42831 .kr(8)
42832 .sr(1)
42833 .m(m)
42834 .n(n)
42835 .k(k)
42836 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080042837 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042838 }
42839 }
42840 }
42841 }
42842
42843 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, n_div_4) {
42844 for (uint32_t n = 8; n <= 12; n += 4) {
42845 for (size_t k = 1; k <= 40; k += 9) {
42846 GemmMicrokernelTester()
42847 .mr(2)
42848 .nr(4)
42849 .kr(8)
42850 .sr(1)
42851 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080042852 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042853 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080042854 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042855 }
42856 }
42857 }
42858
42859 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
42860 for (uint32_t n = 8; n <= 12; n += 4) {
42861 for (size_t k = 1; k <= 40; k += 9) {
42862 GemmMicrokernelTester()
42863 .mr(2)
42864 .nr(4)
42865 .kr(8)
42866 .sr(1)
42867 .m(2)
42868 .n(n)
42869 .k(k)
42870 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080042871 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042872 }
42873 }
42874 }
42875
42876 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_strided_a) {
42877 for (uint32_t n = 8; n <= 12; n += 4) {
42878 for (size_t k = 1; k <= 40; k += 9) {
42879 GemmMicrokernelTester()
42880 .mr(2)
42881 .nr(4)
42882 .kr(8)
42883 .sr(1)
42884 .m(2)
42885 .n(n)
42886 .k(k)
42887 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080042888 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042889 }
42890 }
42891 }
42892
42893 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
42894 for (uint32_t n = 8; n <= 12; n += 4) {
42895 for (size_t k = 1; k <= 40; k += 9) {
42896 for (uint32_t m = 1; m <= 2; m++) {
42897 GemmMicrokernelTester()
42898 .mr(2)
42899 .nr(4)
42900 .kr(8)
42901 .sr(1)
42902 .m(m)
42903 .n(n)
42904 .k(k)
42905 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080042906 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042907 }
42908 }
42909 }
42910 }
42911
42912 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
42913 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080042914 for (uint32_t n = 1; n <= 4; n++) {
42915 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042916 GemmMicrokernelTester()
42917 .mr(2)
42918 .nr(4)
42919 .kr(8)
42920 .sr(1)
42921 .m(m)
42922 .n(n)
42923 .k(k)
42924 .cm_stride(7)
42925 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080042926 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042927 }
42928 }
42929 }
42930 }
42931
42932 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, qmin) {
42933 GemmMicrokernelTester()
42934 .mr(2)
42935 .nr(4)
42936 .kr(8)
42937 .sr(1)
42938 .m(2)
42939 .n(4)
42940 .k(8)
42941 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080042942 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042943 }
42944
42945 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, qmax) {
42946 GemmMicrokernelTester()
42947 .mr(2)
42948 .nr(4)
42949 .kr(8)
42950 .sr(1)
42951 .m(2)
42952 .n(4)
42953 .k(8)
42954 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080042955 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042956 }
42957
42958 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, strided_cm) {
42959 GemmMicrokernelTester()
42960 .mr(2)
42961 .nr(4)
42962 .kr(8)
42963 .sr(1)
42964 .m(2)
42965 .n(4)
42966 .k(8)
42967 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080042968 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042969 }
Marat Dukhan4c617792021-12-21 15:47:58 -080042970#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042971
42972
Marat Dukhan4c617792021-12-21 15:47:58 -080042973#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042974 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8) {
42975 GemmMicrokernelTester()
42976 .mr(3)
42977 .nr(4)
42978 .kr(8)
42979 .sr(1)
42980 .m(3)
42981 .n(4)
42982 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080042983 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042984 }
42985
42986 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, strided_cn) {
42987 GemmMicrokernelTester()
42988 .mr(3)
42989 .nr(4)
42990 .kr(8)
42991 .sr(1)
42992 .m(3)
42993 .n(4)
42994 .k(8)
42995 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080042996 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070042997 }
42998
42999 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_strided_a) {
43000 GemmMicrokernelTester()
43001 .mr(3)
43002 .nr(4)
43003 .kr(8)
43004 .sr(1)
43005 .m(3)
43006 .n(4)
43007 .k(8)
43008 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080043009 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070043010 }
43011
43012 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080043013 for (uint32_t n = 1; n <= 4; n++) {
43014 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070043015 GemmMicrokernelTester()
43016 .mr(3)
43017 .nr(4)
43018 .kr(8)
43019 .sr(1)
43020 .m(m)
43021 .n(n)
43022 .k(8)
43023 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080043024 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070043025 }
43026 }
43027 }
43028
43029 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
43030 for (uint32_t m = 1; m <= 3; m++) {
43031 GemmMicrokernelTester()
43032 .mr(3)
43033 .nr(4)
43034 .kr(8)
43035 .sr(1)
43036 .m(m)
43037 .n(4)
43038 .k(8)
43039 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080043040 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070043041 }
43042 }
43043
43044 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
43045 for (uint32_t n = 1; n <= 4; n++) {
43046 GemmMicrokernelTester()
43047 .mr(3)
43048 .nr(4)
43049 .kr(8)
43050 .sr(1)
43051 .m(3)
43052 .n(n)
43053 .k(8)
43054 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080043055 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070043056 }
43057 }
43058
43059 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_lt_8) {
43060 for (size_t k = 1; k < 8; k++) {
43061 GemmMicrokernelTester()
43062 .mr(3)
43063 .nr(4)
43064 .kr(8)
43065 .sr(1)
43066 .m(3)
43067 .n(4)
43068 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080043069 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070043070 }
43071 }
43072
43073 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_lt_8_strided_a) {
43074 for (size_t k = 1; k < 8; k++) {
43075 GemmMicrokernelTester()
43076 .mr(3)
43077 .nr(4)
43078 .kr(8)
43079 .sr(1)
43080 .m(3)
43081 .n(4)
43082 .k(k)
43083 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080043084 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070043085 }
43086 }
43087
43088 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
43089 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080043090 for (uint32_t n = 1; n <= 4; n++) {
43091 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070043092 GemmMicrokernelTester()
43093 .mr(3)
43094 .nr(4)
43095 .kr(8)
43096 .sr(1)
43097 .m(m)
43098 .n(n)
43099 .k(k)
43100 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080043101 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070043102 }
43103 }
43104 }
43105 }
43106
43107 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_gt_8) {
43108 for (size_t k = 9; k < 16; k++) {
43109 GemmMicrokernelTester()
43110 .mr(3)
43111 .nr(4)
43112 .kr(8)
43113 .sr(1)
43114 .m(3)
43115 .n(4)
43116 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080043117 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070043118 }
43119 }
43120
43121 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_gt_8_strided_a) {
43122 for (size_t k = 9; k < 16; k++) {
43123 GemmMicrokernelTester()
43124 .mr(3)
43125 .nr(4)
43126 .kr(8)
43127 .sr(1)
43128 .m(3)
43129 .n(4)
43130 .k(k)
43131 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080043132 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070043133 }
43134 }
43135
43136 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
43137 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080043138 for (uint32_t n = 1; n <= 4; n++) {
43139 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070043140 GemmMicrokernelTester()
43141 .mr(3)
43142 .nr(4)
43143 .kr(8)
43144 .sr(1)
43145 .m(m)
43146 .n(n)
43147 .k(k)
43148 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080043149 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070043150 }
43151 }
43152 }
43153 }
43154
43155 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_div_8) {
43156 for (size_t k = 16; k <= 80; k += 8) {
43157 GemmMicrokernelTester()
43158 .mr(3)
43159 .nr(4)
43160 .kr(8)
43161 .sr(1)
43162 .m(3)
43163 .n(4)
43164 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080043165 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070043166 }
43167 }
43168
43169 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_div_8_strided_a) {
43170 for (size_t k = 16; k <= 80; k += 8) {
43171 GemmMicrokernelTester()
43172 .mr(3)
43173 .nr(4)
43174 .kr(8)
43175 .sr(1)
43176 .m(3)
43177 .n(4)
43178 .k(k)
43179 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080043180 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070043181 }
43182 }
43183
43184 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
43185 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080043186 for (uint32_t n = 1; n <= 4; n++) {
43187 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070043188 GemmMicrokernelTester()
43189 .mr(3)
43190 .nr(4)
43191 .kr(8)
43192 .sr(1)
43193 .m(m)
43194 .n(n)
43195 .k(k)
43196 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080043197 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070043198 }
43199 }
43200 }
43201 }
43202
43203 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4) {
43204 for (uint32_t n = 5; n < 8; n++) {
43205 for (size_t k = 1; k <= 40; k += 9) {
43206 GemmMicrokernelTester()
43207 .mr(3)
43208 .nr(4)
43209 .kr(8)
43210 .sr(1)
43211 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080043212 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070043213 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080043214 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070043215 }
43216 }
43217 }
43218
43219 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
43220 for (uint32_t n = 5; n < 8; n++) {
43221 for (size_t k = 1; k <= 40; k += 9) {
43222 GemmMicrokernelTester()
43223 .mr(3)
43224 .nr(4)
43225 .kr(8)
43226 .sr(1)
43227 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080043228 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070043229 .k(k)
43230 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080043231 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070043232 }
43233 }
43234 }
43235
43236 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_a) {
43237 for (uint32_t n = 5; n < 8; n++) {
43238 for (size_t k = 1; k <= 40; k += 9) {
43239 GemmMicrokernelTester()
43240 .mr(3)
43241 .nr(4)
43242 .kr(8)
43243 .sr(1)
43244 .m(3)
43245 .n(n)
43246 .k(k)
43247 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080043248 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070043249 }
43250 }
43251 }
43252
43253 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
43254 for (uint32_t n = 5; n < 8; n++) {
43255 for (size_t k = 1; k <= 40; k += 9) {
43256 for (uint32_t m = 1; m <= 3; m++) {
43257 GemmMicrokernelTester()
43258 .mr(3)
43259 .nr(4)
43260 .kr(8)
43261 .sr(1)
43262 .m(m)
43263 .n(n)
43264 .k(k)
43265 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080043266 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070043267 }
43268 }
43269 }
43270 }
43271
43272 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, n_div_4) {
43273 for (uint32_t n = 8; n <= 12; n += 4) {
43274 for (size_t k = 1; k <= 40; k += 9) {
43275 GemmMicrokernelTester()
43276 .mr(3)
43277 .nr(4)
43278 .kr(8)
43279 .sr(1)
43280 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080043281 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070043282 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080043283 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070043284 }
43285 }
43286 }
43287
43288 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
43289 for (uint32_t n = 8; n <= 12; n += 4) {
43290 for (size_t k = 1; k <= 40; k += 9) {
43291 GemmMicrokernelTester()
43292 .mr(3)
43293 .nr(4)
43294 .kr(8)
43295 .sr(1)
43296 .m(3)
43297 .n(n)
43298 .k(k)
43299 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080043300 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070043301 }
43302 }
43303 }
43304
43305 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_strided_a) {
43306 for (uint32_t n = 8; n <= 12; n += 4) {
43307 for (size_t k = 1; k <= 40; k += 9) {
43308 GemmMicrokernelTester()
43309 .mr(3)
43310 .nr(4)
43311 .kr(8)
43312 .sr(1)
43313 .m(3)
43314 .n(n)
43315 .k(k)
43316 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080043317 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070043318 }
43319 }
43320 }
43321
43322 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
43323 for (uint32_t n = 8; n <= 12; n += 4) {
43324 for (size_t k = 1; k <= 40; k += 9) {
43325 for (uint32_t m = 1; m <= 3; m++) {
43326 GemmMicrokernelTester()
43327 .mr(3)
43328 .nr(4)
43329 .kr(8)
43330 .sr(1)
43331 .m(m)
43332 .n(n)
43333 .k(k)
43334 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080043335 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070043336 }
43337 }
43338 }
43339 }
43340
43341 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
43342 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080043343 for (uint32_t n = 1; n <= 4; n++) {
43344 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070043345 GemmMicrokernelTester()
43346 .mr(3)
43347 .nr(4)
43348 .kr(8)
43349 .sr(1)
43350 .m(m)
43351 .n(n)
43352 .k(k)
43353 .cm_stride(7)
43354 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080043355 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070043356 }
43357 }
43358 }
43359 }
43360
43361 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, qmin) {
43362 GemmMicrokernelTester()
43363 .mr(3)
43364 .nr(4)
43365 .kr(8)
43366 .sr(1)
43367 .m(3)
43368 .n(4)
43369 .k(8)
43370 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080043371 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070043372 }
43373
43374 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, qmax) {
43375 GemmMicrokernelTester()
43376 .mr(3)
43377 .nr(4)
43378 .kr(8)
43379 .sr(1)
43380 .m(3)
43381 .n(4)
43382 .k(8)
43383 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080043384 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070043385 }
43386
43387 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, strided_cm) {
43388 GemmMicrokernelTester()
43389 .mr(3)
43390 .nr(4)
43391 .kr(8)
43392 .sr(1)
43393 .m(3)
43394 .n(4)
43395 .k(8)
43396 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080043397 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070043398 }
Marat Dukhan4c617792021-12-21 15:47:58 -080043399#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan8dc106e2021-08-31 15:23:02 -070043400
43401
Marat Dukhan4c617792021-12-21 15:47:58 -080043402#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan8dc106e2021-08-31 15:23:02 -070043403 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2, k_eq_8) {
43404 GemmMicrokernelTester()
43405 .extended_weights(true)
43406 .mr(1)
43407 .nr(4)
43408 .kr(8)
43409 .sr(1)
43410 .m(1)
43411 .n(4)
43412 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080043413 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070043414 }
43415
43416 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2, strided_cn) {
43417 GemmMicrokernelTester()
43418 .extended_weights(true)
43419 .mr(1)
43420 .nr(4)
43421 .kr(8)
43422 .sr(1)
43423 .m(1)
43424 .n(4)
43425 .k(8)
43426 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080043427 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070043428 }
43429
43430 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2, k_eq_8_strided_a) {
43431 GemmMicrokernelTester()
43432 .extended_weights(true)
43433 .mr(1)
43434 .nr(4)
43435 .kr(8)
43436 .sr(1)
43437 .m(1)
43438 .n(4)
43439 .k(8)
43440 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080043441 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070043442 }
43443
43444 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2, k_eq_8_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080043445 for (uint32_t n = 1; n <= 4; n++) {
43446 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070043447 GemmMicrokernelTester()
43448 .extended_weights(true)
43449 .mr(1)
43450 .nr(4)
43451 .kr(8)
43452 .sr(1)
43453 .m(m)
43454 .n(n)
43455 .k(8)
43456 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080043457 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070043458 }
43459 }
43460 }
43461
43462 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2, k_eq_8_subtile_m) {
43463 for (uint32_t m = 1; m <= 1; m++) {
43464 GemmMicrokernelTester()
43465 .extended_weights(true)
43466 .mr(1)
43467 .nr(4)
43468 .kr(8)
43469 .sr(1)
43470 .m(m)
43471 .n(4)
43472 .k(8)
43473 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080043474 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070043475 }
43476 }
43477
43478 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2, k_eq_8_subtile_n) {
43479 for (uint32_t n = 1; n <= 4; n++) {
43480 GemmMicrokernelTester()
43481 .extended_weights(true)
43482 .mr(1)
43483 .nr(4)
43484 .kr(8)
43485 .sr(1)
43486 .m(1)
43487 .n(n)
43488 .k(8)
43489 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080043490 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070043491 }
43492 }
43493
43494 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2, k_lt_8) {
43495 for (size_t k = 1; k < 8; k++) {
43496 GemmMicrokernelTester()
43497 .extended_weights(true)
43498 .mr(1)
43499 .nr(4)
43500 .kr(8)
43501 .sr(1)
43502 .m(1)
43503 .n(4)
43504 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080043505 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070043506 }
43507 }
43508
43509 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2, k_lt_8_strided_a) {
43510 for (size_t k = 1; k < 8; k++) {
43511 GemmMicrokernelTester()
43512 .extended_weights(true)
43513 .mr(1)
43514 .nr(4)
43515 .kr(8)
43516 .sr(1)
43517 .m(1)
43518 .n(4)
43519 .k(k)
43520 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080043521 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070043522 }
43523 }
43524
43525 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2, k_lt_8_subtile) {
43526 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080043527 for (uint32_t n = 1; n <= 4; n++) {
43528 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070043529 GemmMicrokernelTester()
43530 .extended_weights(true)
43531 .mr(1)
43532 .nr(4)
43533 .kr(8)
43534 .sr(1)
43535 .m(m)
43536 .n(n)
43537 .k(k)
43538 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080043539 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070043540 }
43541 }
43542 }
43543 }
43544
43545 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2, k_gt_8) {
43546 for (size_t k = 9; k < 16; k++) {
43547 GemmMicrokernelTester()
43548 .extended_weights(true)
43549 .mr(1)
43550 .nr(4)
43551 .kr(8)
43552 .sr(1)
43553 .m(1)
43554 .n(4)
43555 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080043556 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070043557 }
43558 }
43559
43560 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2, k_gt_8_strided_a) {
43561 for (size_t k = 9; k < 16; k++) {
43562 GemmMicrokernelTester()
43563 .extended_weights(true)
43564 .mr(1)
43565 .nr(4)
43566 .kr(8)
43567 .sr(1)
43568 .m(1)
43569 .n(4)
43570 .k(k)
43571 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080043572 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070043573 }
43574 }
43575
43576 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2, k_gt_8_subtile) {
43577 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080043578 for (uint32_t n = 1; n <= 4; n++) {
43579 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070043580 GemmMicrokernelTester()
43581 .extended_weights(true)
43582 .mr(1)
43583 .nr(4)
43584 .kr(8)
43585 .sr(1)
43586 .m(m)
43587 .n(n)
43588 .k(k)
43589 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080043590 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070043591 }
43592 }
43593 }
43594 }
43595
43596 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2, k_div_8) {
43597 for (size_t k = 16; k <= 80; k += 8) {
43598 GemmMicrokernelTester()
43599 .extended_weights(true)
43600 .mr(1)
43601 .nr(4)
43602 .kr(8)
43603 .sr(1)
43604 .m(1)
43605 .n(4)
43606 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080043607 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070043608 }
43609 }
43610
43611 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2, k_div_8_strided_a) {
43612 for (size_t k = 16; k <= 80; k += 8) {
43613 GemmMicrokernelTester()
43614 .extended_weights(true)
43615 .mr(1)
43616 .nr(4)
43617 .kr(8)
43618 .sr(1)
43619 .m(1)
43620 .n(4)
43621 .k(k)
43622 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080043623 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070043624 }
43625 }
43626
43627 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2, k_div_8_subtile) {
43628 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080043629 for (uint32_t n = 1; n <= 4; n++) {
43630 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070043631 GemmMicrokernelTester()
43632 .extended_weights(true)
43633 .mr(1)
43634 .nr(4)
43635 .kr(8)
43636 .sr(1)
43637 .m(m)
43638 .n(n)
43639 .k(k)
43640 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080043641 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070043642 }
43643 }
43644 }
43645 }
43646
43647 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2, n_gt_4) {
43648 for (uint32_t n = 5; n < 8; n++) {
43649 for (size_t k = 1; k <= 40; k += 9) {
43650 GemmMicrokernelTester()
43651 .extended_weights(true)
43652 .mr(1)
43653 .nr(4)
43654 .kr(8)
43655 .sr(1)
43656 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080043657 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070043658 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080043659 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070043660 }
43661 }
43662 }
43663
43664 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2, n_gt_4_strided_cn) {
43665 for (uint32_t n = 5; n < 8; n++) {
43666 for (size_t k = 1; k <= 40; k += 9) {
43667 GemmMicrokernelTester()
43668 .extended_weights(true)
43669 .mr(1)
43670 .nr(4)
43671 .kr(8)
43672 .sr(1)
43673 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080043674 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070043675 .k(k)
43676 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080043677 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070043678 }
43679 }
43680 }
43681
43682 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2, n_gt_4_strided_a) {
43683 for (uint32_t n = 5; n < 8; n++) {
43684 for (size_t k = 1; k <= 40; k += 9) {
43685 GemmMicrokernelTester()
43686 .extended_weights(true)
43687 .mr(1)
43688 .nr(4)
43689 .kr(8)
43690 .sr(1)
43691 .m(1)
43692 .n(n)
43693 .k(k)
43694 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080043695 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070043696 }
43697 }
43698 }
43699
43700 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2, n_gt_4_subtile) {
43701 for (uint32_t n = 5; n < 8; n++) {
43702 for (size_t k = 1; k <= 40; k += 9) {
43703 for (uint32_t m = 1; m <= 1; m++) {
43704 GemmMicrokernelTester()
43705 .extended_weights(true)
43706 .mr(1)
43707 .nr(4)
43708 .kr(8)
43709 .sr(1)
43710 .m(m)
43711 .n(n)
43712 .k(k)
43713 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080043714 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070043715 }
43716 }
43717 }
43718 }
43719
43720 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2, n_div_4) {
43721 for (uint32_t n = 8; n <= 12; n += 4) {
43722 for (size_t k = 1; k <= 40; k += 9) {
43723 GemmMicrokernelTester()
43724 .extended_weights(true)
43725 .mr(1)
43726 .nr(4)
43727 .kr(8)
43728 .sr(1)
43729 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080043730 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070043731 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080043732 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070043733 }
43734 }
43735 }
43736
43737 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2, n_div_4_strided_cn) {
43738 for (uint32_t n = 8; n <= 12; n += 4) {
43739 for (size_t k = 1; k <= 40; k += 9) {
43740 GemmMicrokernelTester()
43741 .extended_weights(true)
43742 .mr(1)
43743 .nr(4)
43744 .kr(8)
43745 .sr(1)
43746 .m(1)
43747 .n(n)
43748 .k(k)
43749 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080043750 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070043751 }
43752 }
43753 }
43754
43755 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2, n_div_4_strided_a) {
43756 for (uint32_t n = 8; n <= 12; n += 4) {
43757 for (size_t k = 1; k <= 40; k += 9) {
43758 GemmMicrokernelTester()
43759 .extended_weights(true)
43760 .mr(1)
43761 .nr(4)
43762 .kr(8)
43763 .sr(1)
43764 .m(1)
43765 .n(n)
43766 .k(k)
43767 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080043768 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070043769 }
43770 }
43771 }
43772
43773 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2, n_div_4_subtile) {
43774 for (uint32_t n = 8; n <= 12; n += 4) {
43775 for (size_t k = 1; k <= 40; k += 9) {
43776 for (uint32_t m = 1; m <= 1; m++) {
43777 GemmMicrokernelTester()
43778 .extended_weights(true)
43779 .mr(1)
43780 .nr(4)
43781 .kr(8)
43782 .sr(1)
43783 .m(m)
43784 .n(n)
43785 .k(k)
43786 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080043787 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070043788 }
43789 }
43790 }
43791 }
43792
43793 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2, strided_cm_subtile) {
43794 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080043795 for (uint32_t n = 1; n <= 4; n++) {
43796 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070043797 GemmMicrokernelTester()
43798 .extended_weights(true)
43799 .mr(1)
43800 .nr(4)
43801 .kr(8)
43802 .sr(1)
43803 .m(m)
43804 .n(n)
43805 .k(k)
43806 .cm_stride(7)
43807 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080043808 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070043809 }
43810 }
43811 }
43812 }
43813
43814 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2, strided_cm) {
43815 GemmMicrokernelTester()
43816 .extended_weights(true)
43817 .mr(1)
43818 .nr(4)
43819 .kr(8)
43820 .sr(1)
43821 .m(1)
43822 .n(4)
43823 .k(8)
43824 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080043825 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070043826 }
Marat Dukhan4c617792021-12-21 15:47:58 -080043827#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan8dc106e2021-08-31 15:23:02 -070043828
43829
Marat Dukhan4c617792021-12-21 15:47:58 -080043830#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhandfc2db02021-08-08 21:19:07 -070043831 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL16_LD64, k_eq_8) {
Marat Dukhan4741e412021-06-30 13:38:06 -070043832 GemmMicrokernelTester()
43833 .mr(3)
43834 .nr(4)
43835 .kr(8)
43836 .sr(1)
43837 .m(3)
43838 .n(4)
43839 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080043840 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070043841 }
43842
Marat Dukhandfc2db02021-08-08 21:19:07 -070043843 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL16_LD64, strided_cn) {
Marat Dukhan4741e412021-06-30 13:38:06 -070043844 GemmMicrokernelTester()
43845 .mr(3)
43846 .nr(4)
43847 .kr(8)
43848 .sr(1)
43849 .m(3)
43850 .n(4)
43851 .k(8)
43852 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080043853 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070043854 }
43855
Marat Dukhandfc2db02021-08-08 21:19:07 -070043856 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL16_LD64, k_eq_8_strided_a) {
Marat Dukhan4741e412021-06-30 13:38:06 -070043857 GemmMicrokernelTester()
43858 .mr(3)
43859 .nr(4)
43860 .kr(8)
43861 .sr(1)
43862 .m(3)
43863 .n(4)
43864 .k(8)
43865 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080043866 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070043867 }
43868
Marat Dukhandfc2db02021-08-08 21:19:07 -070043869 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL16_LD64, k_eq_8_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080043870 for (uint32_t n = 1; n <= 4; n++) {
43871 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan4741e412021-06-30 13:38:06 -070043872 GemmMicrokernelTester()
43873 .mr(3)
43874 .nr(4)
43875 .kr(8)
43876 .sr(1)
43877 .m(m)
43878 .n(n)
43879 .k(8)
43880 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080043881 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070043882 }
43883 }
43884 }
43885
Marat Dukhandfc2db02021-08-08 21:19:07 -070043886 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL16_LD64, k_eq_8_subtile_m) {
Marat Dukhan4741e412021-06-30 13:38:06 -070043887 for (uint32_t m = 1; m <= 3; m++) {
43888 GemmMicrokernelTester()
43889 .mr(3)
43890 .nr(4)
43891 .kr(8)
43892 .sr(1)
43893 .m(m)
43894 .n(4)
43895 .k(8)
43896 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080043897 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070043898 }
43899 }
43900
Marat Dukhandfc2db02021-08-08 21:19:07 -070043901 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL16_LD64, k_eq_8_subtile_n) {
Marat Dukhan4741e412021-06-30 13:38:06 -070043902 for (uint32_t n = 1; n <= 4; n++) {
43903 GemmMicrokernelTester()
43904 .mr(3)
43905 .nr(4)
43906 .kr(8)
43907 .sr(1)
43908 .m(3)
43909 .n(n)
43910 .k(8)
43911 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080043912 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070043913 }
43914 }
43915
Marat Dukhandfc2db02021-08-08 21:19:07 -070043916 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL16_LD64, k_lt_8) {
Marat Dukhan4741e412021-06-30 13:38:06 -070043917 for (size_t k = 1; k < 8; k++) {
43918 GemmMicrokernelTester()
43919 .mr(3)
43920 .nr(4)
43921 .kr(8)
43922 .sr(1)
43923 .m(3)
43924 .n(4)
43925 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080043926 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070043927 }
43928 }
43929
Marat Dukhandfc2db02021-08-08 21:19:07 -070043930 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL16_LD64, k_lt_8_strided_a) {
Marat Dukhan4741e412021-06-30 13:38:06 -070043931 for (size_t k = 1; k < 8; k++) {
43932 GemmMicrokernelTester()
43933 .mr(3)
43934 .nr(4)
43935 .kr(8)
43936 .sr(1)
43937 .m(3)
43938 .n(4)
43939 .k(k)
43940 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080043941 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070043942 }
43943 }
43944
Marat Dukhandfc2db02021-08-08 21:19:07 -070043945 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL16_LD64, k_lt_8_subtile) {
Marat Dukhan4741e412021-06-30 13:38:06 -070043946 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080043947 for (uint32_t n = 1; n <= 4; n++) {
43948 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan4741e412021-06-30 13:38:06 -070043949 GemmMicrokernelTester()
43950 .mr(3)
43951 .nr(4)
43952 .kr(8)
43953 .sr(1)
43954 .m(m)
43955 .n(n)
43956 .k(k)
43957 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080043958 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070043959 }
43960 }
43961 }
43962 }
43963
Marat Dukhandfc2db02021-08-08 21:19:07 -070043964 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL16_LD64, k_gt_8) {
Marat Dukhan4741e412021-06-30 13:38:06 -070043965 for (size_t k = 9; k < 16; k++) {
43966 GemmMicrokernelTester()
43967 .mr(3)
43968 .nr(4)
43969 .kr(8)
43970 .sr(1)
43971 .m(3)
43972 .n(4)
43973 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080043974 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070043975 }
43976 }
43977
Marat Dukhandfc2db02021-08-08 21:19:07 -070043978 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL16_LD64, k_gt_8_strided_a) {
Marat Dukhan4741e412021-06-30 13:38:06 -070043979 for (size_t k = 9; k < 16; k++) {
43980 GemmMicrokernelTester()
43981 .mr(3)
43982 .nr(4)
43983 .kr(8)
43984 .sr(1)
43985 .m(3)
43986 .n(4)
43987 .k(k)
43988 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080043989 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070043990 }
43991 }
43992
Marat Dukhandfc2db02021-08-08 21:19:07 -070043993 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL16_LD64, k_gt_8_subtile) {
Marat Dukhan4741e412021-06-30 13:38:06 -070043994 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080043995 for (uint32_t n = 1; n <= 4; n++) {
43996 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan4741e412021-06-30 13:38:06 -070043997 GemmMicrokernelTester()
43998 .mr(3)
43999 .nr(4)
44000 .kr(8)
44001 .sr(1)
44002 .m(m)
44003 .n(n)
44004 .k(k)
44005 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080044006 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070044007 }
44008 }
44009 }
44010 }
44011
Marat Dukhandfc2db02021-08-08 21:19:07 -070044012 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL16_LD64, k_div_8) {
Marat Dukhan4741e412021-06-30 13:38:06 -070044013 for (size_t k = 16; k <= 80; k += 8) {
44014 GemmMicrokernelTester()
44015 .mr(3)
44016 .nr(4)
44017 .kr(8)
44018 .sr(1)
44019 .m(3)
44020 .n(4)
44021 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080044022 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070044023 }
44024 }
44025
Marat Dukhandfc2db02021-08-08 21:19:07 -070044026 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL16_LD64, k_div_8_strided_a) {
Marat Dukhan4741e412021-06-30 13:38:06 -070044027 for (size_t k = 16; k <= 80; k += 8) {
44028 GemmMicrokernelTester()
44029 .mr(3)
44030 .nr(4)
44031 .kr(8)
44032 .sr(1)
44033 .m(3)
44034 .n(4)
44035 .k(k)
44036 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080044037 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070044038 }
44039 }
44040
Marat Dukhandfc2db02021-08-08 21:19:07 -070044041 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL16_LD64, k_div_8_subtile) {
Marat Dukhan4741e412021-06-30 13:38:06 -070044042 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080044043 for (uint32_t n = 1; n <= 4; n++) {
44044 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan4741e412021-06-30 13:38:06 -070044045 GemmMicrokernelTester()
44046 .mr(3)
44047 .nr(4)
44048 .kr(8)
44049 .sr(1)
44050 .m(m)
44051 .n(n)
44052 .k(k)
44053 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080044054 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070044055 }
44056 }
44057 }
44058 }
44059
Marat Dukhandfc2db02021-08-08 21:19:07 -070044060 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL16_LD64, n_gt_4) {
Marat Dukhan4741e412021-06-30 13:38:06 -070044061 for (uint32_t n = 5; n < 8; n++) {
44062 for (size_t k = 1; k <= 40; k += 9) {
44063 GemmMicrokernelTester()
44064 .mr(3)
44065 .nr(4)
44066 .kr(8)
44067 .sr(1)
44068 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080044069 .n(n)
Marat Dukhan4741e412021-06-30 13:38:06 -070044070 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080044071 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070044072 }
44073 }
44074 }
44075
Marat Dukhandfc2db02021-08-08 21:19:07 -070044076 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL16_LD64, n_gt_4_strided_cn) {
Marat Dukhan4741e412021-06-30 13:38:06 -070044077 for (uint32_t n = 5; n < 8; n++) {
44078 for (size_t k = 1; k <= 40; k += 9) {
44079 GemmMicrokernelTester()
44080 .mr(3)
44081 .nr(4)
44082 .kr(8)
44083 .sr(1)
44084 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080044085 .n(n)
Marat Dukhan4741e412021-06-30 13:38:06 -070044086 .k(k)
44087 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080044088 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070044089 }
44090 }
44091 }
44092
Marat Dukhandfc2db02021-08-08 21:19:07 -070044093 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL16_LD64, n_gt_4_strided_a) {
Marat Dukhan4741e412021-06-30 13:38:06 -070044094 for (uint32_t n = 5; n < 8; n++) {
44095 for (size_t k = 1; k <= 40; k += 9) {
44096 GemmMicrokernelTester()
44097 .mr(3)
44098 .nr(4)
44099 .kr(8)
44100 .sr(1)
44101 .m(3)
44102 .n(n)
44103 .k(k)
44104 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080044105 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070044106 }
44107 }
44108 }
44109
Marat Dukhandfc2db02021-08-08 21:19:07 -070044110 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL16_LD64, n_gt_4_subtile) {
Marat Dukhan4741e412021-06-30 13:38:06 -070044111 for (uint32_t n = 5; n < 8; n++) {
44112 for (size_t k = 1; k <= 40; k += 9) {
44113 for (uint32_t m = 1; m <= 3; m++) {
44114 GemmMicrokernelTester()
44115 .mr(3)
44116 .nr(4)
44117 .kr(8)
44118 .sr(1)
44119 .m(m)
44120 .n(n)
44121 .k(k)
44122 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080044123 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070044124 }
44125 }
44126 }
44127 }
44128
Marat Dukhandfc2db02021-08-08 21:19:07 -070044129 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL16_LD64, n_div_4) {
Marat Dukhan4741e412021-06-30 13:38:06 -070044130 for (uint32_t n = 8; n <= 12; n += 4) {
44131 for (size_t k = 1; k <= 40; k += 9) {
44132 GemmMicrokernelTester()
44133 .mr(3)
44134 .nr(4)
44135 .kr(8)
44136 .sr(1)
44137 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080044138 .n(n)
Marat Dukhan4741e412021-06-30 13:38:06 -070044139 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080044140 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070044141 }
44142 }
44143 }
44144
Marat Dukhandfc2db02021-08-08 21:19:07 -070044145 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL16_LD64, n_div_4_strided_cn) {
Marat Dukhan4741e412021-06-30 13:38:06 -070044146 for (uint32_t n = 8; n <= 12; n += 4) {
44147 for (size_t k = 1; k <= 40; k += 9) {
44148 GemmMicrokernelTester()
44149 .mr(3)
44150 .nr(4)
44151 .kr(8)
44152 .sr(1)
44153 .m(3)
44154 .n(n)
44155 .k(k)
44156 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080044157 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070044158 }
44159 }
44160 }
44161
Marat Dukhandfc2db02021-08-08 21:19:07 -070044162 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL16_LD64, n_div_4_strided_a) {
Marat Dukhan4741e412021-06-30 13:38:06 -070044163 for (uint32_t n = 8; n <= 12; n += 4) {
44164 for (size_t k = 1; k <= 40; k += 9) {
44165 GemmMicrokernelTester()
44166 .mr(3)
44167 .nr(4)
44168 .kr(8)
44169 .sr(1)
44170 .m(3)
44171 .n(n)
44172 .k(k)
44173 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080044174 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070044175 }
44176 }
44177 }
44178
Marat Dukhandfc2db02021-08-08 21:19:07 -070044179 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL16_LD64, n_div_4_subtile) {
Marat Dukhan4741e412021-06-30 13:38:06 -070044180 for (uint32_t n = 8; n <= 12; n += 4) {
44181 for (size_t k = 1; k <= 40; k += 9) {
44182 for (uint32_t m = 1; m <= 3; m++) {
44183 GemmMicrokernelTester()
44184 .mr(3)
44185 .nr(4)
44186 .kr(8)
44187 .sr(1)
44188 .m(m)
44189 .n(n)
44190 .k(k)
44191 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080044192 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070044193 }
44194 }
44195 }
44196 }
44197
Marat Dukhandfc2db02021-08-08 21:19:07 -070044198 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL16_LD64, strided_cm_subtile) {
Marat Dukhan4741e412021-06-30 13:38:06 -070044199 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080044200 for (uint32_t n = 1; n <= 4; n++) {
44201 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan4741e412021-06-30 13:38:06 -070044202 GemmMicrokernelTester()
44203 .mr(3)
44204 .nr(4)
44205 .kr(8)
44206 .sr(1)
44207 .m(m)
44208 .n(n)
44209 .k(k)
44210 .cm_stride(7)
44211 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080044212 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070044213 }
44214 }
44215 }
44216 }
44217
Marat Dukhandfc2db02021-08-08 21:19:07 -070044218 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL16_LD64, qmin) {
Marat Dukhan4741e412021-06-30 13:38:06 -070044219 GemmMicrokernelTester()
44220 .mr(3)
44221 .nr(4)
44222 .kr(8)
44223 .sr(1)
44224 .m(3)
44225 .n(4)
44226 .k(8)
44227 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080044228 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070044229 }
44230
Marat Dukhandfc2db02021-08-08 21:19:07 -070044231 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL16_LD64, qmax) {
Marat Dukhan4741e412021-06-30 13:38:06 -070044232 GemmMicrokernelTester()
44233 .mr(3)
44234 .nr(4)
44235 .kr(8)
44236 .sr(1)
44237 .m(3)
44238 .n(4)
44239 .k(8)
44240 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080044241 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070044242 }
44243
Marat Dukhandfc2db02021-08-08 21:19:07 -070044244 TEST(QS8_GEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL16_LD64, strided_cm) {
Marat Dukhan4741e412021-06-30 13:38:06 -070044245 GemmMicrokernelTester()
44246 .mr(3)
44247 .nr(4)
44248 .kr(8)
44249 .sr(1)
44250 .m(3)
44251 .n(4)
44252 .k(8)
44253 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080044254 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul16_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070044255 }
Marat Dukhan4c617792021-12-21 15:47:58 -080044256#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan4741e412021-06-30 13:38:06 -070044257
44258
Marat Dukhan4c617792021-12-21 15:47:58 -080044259#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhandfc2db02021-08-08 21:19:07 -070044260 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_MUL16_LD128, k_eq_8) {
Marat Dukhan4741e412021-06-30 13:38:06 -070044261 GemmMicrokernelTester()
44262 .mr(2)
44263 .nr(4)
44264 .kr(8)
44265 .sr(1)
44266 .m(2)
44267 .n(4)
44268 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080044269 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070044270 }
44271
Marat Dukhandfc2db02021-08-08 21:19:07 -070044272 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_MUL16_LD128, strided_cn) {
Marat Dukhan4741e412021-06-30 13:38:06 -070044273 GemmMicrokernelTester()
44274 .mr(2)
44275 .nr(4)
44276 .kr(8)
44277 .sr(1)
44278 .m(2)
44279 .n(4)
44280 .k(8)
44281 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080044282 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070044283 }
44284
Marat Dukhandfc2db02021-08-08 21:19:07 -070044285 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_MUL16_LD128, k_eq_8_strided_a) {
Marat Dukhan4741e412021-06-30 13:38:06 -070044286 GemmMicrokernelTester()
44287 .mr(2)
44288 .nr(4)
44289 .kr(8)
44290 .sr(1)
44291 .m(2)
44292 .n(4)
44293 .k(8)
44294 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080044295 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070044296 }
44297
Marat Dukhandfc2db02021-08-08 21:19:07 -070044298 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_MUL16_LD128, k_eq_8_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080044299 for (uint32_t n = 1; n <= 4; n++) {
44300 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan4741e412021-06-30 13:38:06 -070044301 GemmMicrokernelTester()
44302 .mr(2)
44303 .nr(4)
44304 .kr(8)
44305 .sr(1)
44306 .m(m)
44307 .n(n)
44308 .k(8)
44309 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080044310 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070044311 }
44312 }
44313 }
44314
Marat Dukhandfc2db02021-08-08 21:19:07 -070044315 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_MUL16_LD128, k_eq_8_subtile_m) {
Marat Dukhan4741e412021-06-30 13:38:06 -070044316 for (uint32_t m = 1; m <= 2; m++) {
44317 GemmMicrokernelTester()
44318 .mr(2)
44319 .nr(4)
44320 .kr(8)
44321 .sr(1)
44322 .m(m)
44323 .n(4)
44324 .k(8)
44325 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080044326 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070044327 }
44328 }
44329
Marat Dukhandfc2db02021-08-08 21:19:07 -070044330 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_MUL16_LD128, k_eq_8_subtile_n) {
Marat Dukhan4741e412021-06-30 13:38:06 -070044331 for (uint32_t n = 1; n <= 4; n++) {
44332 GemmMicrokernelTester()
44333 .mr(2)
44334 .nr(4)
44335 .kr(8)
44336 .sr(1)
44337 .m(2)
44338 .n(n)
44339 .k(8)
44340 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080044341 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070044342 }
44343 }
44344
Marat Dukhandfc2db02021-08-08 21:19:07 -070044345 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_MUL16_LD128, k_lt_8) {
Marat Dukhan4741e412021-06-30 13:38:06 -070044346 for (size_t k = 1; k < 8; k++) {
44347 GemmMicrokernelTester()
44348 .mr(2)
44349 .nr(4)
44350 .kr(8)
44351 .sr(1)
44352 .m(2)
44353 .n(4)
44354 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080044355 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070044356 }
44357 }
44358
Marat Dukhandfc2db02021-08-08 21:19:07 -070044359 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_MUL16_LD128, k_lt_8_strided_a) {
Marat Dukhan4741e412021-06-30 13:38:06 -070044360 for (size_t k = 1; k < 8; k++) {
44361 GemmMicrokernelTester()
44362 .mr(2)
44363 .nr(4)
44364 .kr(8)
44365 .sr(1)
44366 .m(2)
44367 .n(4)
44368 .k(k)
44369 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080044370 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070044371 }
44372 }
44373
Marat Dukhandfc2db02021-08-08 21:19:07 -070044374 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_MUL16_LD128, k_lt_8_subtile) {
Marat Dukhan4741e412021-06-30 13:38:06 -070044375 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080044376 for (uint32_t n = 1; n <= 4; n++) {
44377 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan4741e412021-06-30 13:38:06 -070044378 GemmMicrokernelTester()
44379 .mr(2)
44380 .nr(4)
44381 .kr(8)
44382 .sr(1)
44383 .m(m)
44384 .n(n)
44385 .k(k)
44386 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080044387 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070044388 }
44389 }
44390 }
44391 }
44392
Marat Dukhandfc2db02021-08-08 21:19:07 -070044393 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_MUL16_LD128, k_gt_8) {
Marat Dukhan4741e412021-06-30 13:38:06 -070044394 for (size_t k = 9; k < 16; k++) {
44395 GemmMicrokernelTester()
44396 .mr(2)
44397 .nr(4)
44398 .kr(8)
44399 .sr(1)
44400 .m(2)
44401 .n(4)
44402 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080044403 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070044404 }
44405 }
44406
Marat Dukhandfc2db02021-08-08 21:19:07 -070044407 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_MUL16_LD128, k_gt_8_strided_a) {
Marat Dukhan4741e412021-06-30 13:38:06 -070044408 for (size_t k = 9; k < 16; k++) {
44409 GemmMicrokernelTester()
44410 .mr(2)
44411 .nr(4)
44412 .kr(8)
44413 .sr(1)
44414 .m(2)
44415 .n(4)
44416 .k(k)
44417 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080044418 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070044419 }
44420 }
44421
Marat Dukhandfc2db02021-08-08 21:19:07 -070044422 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_MUL16_LD128, k_gt_8_subtile) {
Marat Dukhan4741e412021-06-30 13:38:06 -070044423 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080044424 for (uint32_t n = 1; n <= 4; n++) {
44425 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan4741e412021-06-30 13:38:06 -070044426 GemmMicrokernelTester()
44427 .mr(2)
44428 .nr(4)
44429 .kr(8)
44430 .sr(1)
44431 .m(m)
44432 .n(n)
44433 .k(k)
44434 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080044435 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070044436 }
44437 }
44438 }
44439 }
44440
Marat Dukhandfc2db02021-08-08 21:19:07 -070044441 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_MUL16_LD128, k_div_8) {
Marat Dukhan4741e412021-06-30 13:38:06 -070044442 for (size_t k = 16; k <= 80; k += 8) {
44443 GemmMicrokernelTester()
44444 .mr(2)
44445 .nr(4)
44446 .kr(8)
44447 .sr(1)
44448 .m(2)
44449 .n(4)
44450 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080044451 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070044452 }
44453 }
44454
Marat Dukhandfc2db02021-08-08 21:19:07 -070044455 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_MUL16_LD128, k_div_8_strided_a) {
Marat Dukhan4741e412021-06-30 13:38:06 -070044456 for (size_t k = 16; k <= 80; k += 8) {
44457 GemmMicrokernelTester()
44458 .mr(2)
44459 .nr(4)
44460 .kr(8)
44461 .sr(1)
44462 .m(2)
44463 .n(4)
44464 .k(k)
44465 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080044466 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070044467 }
44468 }
44469
Marat Dukhandfc2db02021-08-08 21:19:07 -070044470 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_MUL16_LD128, k_div_8_subtile) {
Marat Dukhan4741e412021-06-30 13:38:06 -070044471 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080044472 for (uint32_t n = 1; n <= 4; n++) {
44473 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan4741e412021-06-30 13:38:06 -070044474 GemmMicrokernelTester()
44475 .mr(2)
44476 .nr(4)
44477 .kr(8)
44478 .sr(1)
44479 .m(m)
44480 .n(n)
44481 .k(k)
44482 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080044483 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070044484 }
44485 }
44486 }
44487 }
44488
Marat Dukhandfc2db02021-08-08 21:19:07 -070044489 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_MUL16_LD128, n_gt_4) {
Marat Dukhan4741e412021-06-30 13:38:06 -070044490 for (uint32_t n = 5; n < 8; n++) {
44491 for (size_t k = 1; k <= 40; k += 9) {
44492 GemmMicrokernelTester()
44493 .mr(2)
44494 .nr(4)
44495 .kr(8)
44496 .sr(1)
44497 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080044498 .n(n)
Marat Dukhan4741e412021-06-30 13:38:06 -070044499 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080044500 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070044501 }
44502 }
44503 }
44504
Marat Dukhandfc2db02021-08-08 21:19:07 -070044505 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_MUL16_LD128, n_gt_4_strided_cn) {
Marat Dukhan4741e412021-06-30 13:38:06 -070044506 for (uint32_t n = 5; n < 8; n++) {
44507 for (size_t k = 1; k <= 40; k += 9) {
44508 GemmMicrokernelTester()
44509 .mr(2)
44510 .nr(4)
44511 .kr(8)
44512 .sr(1)
44513 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080044514 .n(n)
Marat Dukhan4741e412021-06-30 13:38:06 -070044515 .k(k)
44516 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080044517 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070044518 }
44519 }
44520 }
44521
Marat Dukhandfc2db02021-08-08 21:19:07 -070044522 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_MUL16_LD128, n_gt_4_strided_a) {
Marat Dukhan4741e412021-06-30 13:38:06 -070044523 for (uint32_t n = 5; n < 8; n++) {
44524 for (size_t k = 1; k <= 40; k += 9) {
44525 GemmMicrokernelTester()
44526 .mr(2)
44527 .nr(4)
44528 .kr(8)
44529 .sr(1)
44530 .m(2)
44531 .n(n)
44532 .k(k)
44533 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080044534 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070044535 }
44536 }
44537 }
44538
Marat Dukhandfc2db02021-08-08 21:19:07 -070044539 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_MUL16_LD128, n_gt_4_subtile) {
Marat Dukhan4741e412021-06-30 13:38:06 -070044540 for (uint32_t n = 5; n < 8; n++) {
44541 for (size_t k = 1; k <= 40; k += 9) {
44542 for (uint32_t m = 1; m <= 2; m++) {
44543 GemmMicrokernelTester()
44544 .mr(2)
44545 .nr(4)
44546 .kr(8)
44547 .sr(1)
44548 .m(m)
44549 .n(n)
44550 .k(k)
44551 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080044552 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070044553 }
44554 }
44555 }
44556 }
44557
Marat Dukhandfc2db02021-08-08 21:19:07 -070044558 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_MUL16_LD128, n_div_4) {
Marat Dukhan4741e412021-06-30 13:38:06 -070044559 for (uint32_t n = 8; n <= 12; n += 4) {
44560 for (size_t k = 1; k <= 40; k += 9) {
44561 GemmMicrokernelTester()
44562 .mr(2)
44563 .nr(4)
44564 .kr(8)
44565 .sr(1)
44566 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080044567 .n(n)
Marat Dukhan4741e412021-06-30 13:38:06 -070044568 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080044569 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070044570 }
44571 }
44572 }
44573
Marat Dukhandfc2db02021-08-08 21:19:07 -070044574 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_MUL16_LD128, n_div_4_strided_cn) {
Marat Dukhan4741e412021-06-30 13:38:06 -070044575 for (uint32_t n = 8; n <= 12; n += 4) {
44576 for (size_t k = 1; k <= 40; k += 9) {
44577 GemmMicrokernelTester()
44578 .mr(2)
44579 .nr(4)
44580 .kr(8)
44581 .sr(1)
44582 .m(2)
44583 .n(n)
44584 .k(k)
44585 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080044586 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070044587 }
44588 }
44589 }
44590
Marat Dukhandfc2db02021-08-08 21:19:07 -070044591 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_MUL16_LD128, n_div_4_strided_a) {
Marat Dukhan4741e412021-06-30 13:38:06 -070044592 for (uint32_t n = 8; n <= 12; n += 4) {
44593 for (size_t k = 1; k <= 40; k += 9) {
44594 GemmMicrokernelTester()
44595 .mr(2)
44596 .nr(4)
44597 .kr(8)
44598 .sr(1)
44599 .m(2)
44600 .n(n)
44601 .k(k)
44602 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080044603 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070044604 }
44605 }
44606 }
44607
Marat Dukhandfc2db02021-08-08 21:19:07 -070044608 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_MUL16_LD128, n_div_4_subtile) {
Marat Dukhan4741e412021-06-30 13:38:06 -070044609 for (uint32_t n = 8; n <= 12; n += 4) {
44610 for (size_t k = 1; k <= 40; k += 9) {
44611 for (uint32_t m = 1; m <= 2; m++) {
44612 GemmMicrokernelTester()
44613 .mr(2)
44614 .nr(4)
44615 .kr(8)
44616 .sr(1)
44617 .m(m)
44618 .n(n)
44619 .k(k)
44620 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080044621 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070044622 }
44623 }
44624 }
44625 }
44626
Marat Dukhandfc2db02021-08-08 21:19:07 -070044627 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_MUL16_LD128, strided_cm_subtile) {
Marat Dukhan4741e412021-06-30 13:38:06 -070044628 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080044629 for (uint32_t n = 1; n <= 4; n++) {
44630 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan4741e412021-06-30 13:38:06 -070044631 GemmMicrokernelTester()
44632 .mr(2)
44633 .nr(4)
44634 .kr(8)
44635 .sr(1)
44636 .m(m)
44637 .n(n)
44638 .k(k)
44639 .cm_stride(7)
44640 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080044641 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070044642 }
44643 }
44644 }
44645 }
44646
Marat Dukhandfc2db02021-08-08 21:19:07 -070044647 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_MUL16_LD128, qmin) {
Marat Dukhan4741e412021-06-30 13:38:06 -070044648 GemmMicrokernelTester()
44649 .mr(2)
44650 .nr(4)
44651 .kr(8)
44652 .sr(1)
44653 .m(2)
44654 .n(4)
44655 .k(8)
44656 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080044657 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070044658 }
44659
Marat Dukhandfc2db02021-08-08 21:19:07 -070044660 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_MUL16_LD128, qmax) {
Marat Dukhan4741e412021-06-30 13:38:06 -070044661 GemmMicrokernelTester()
44662 .mr(2)
44663 .nr(4)
44664 .kr(8)
44665 .sr(1)
44666 .m(2)
44667 .n(4)
44668 .k(8)
44669 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080044670 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070044671 }
44672
Marat Dukhandfc2db02021-08-08 21:19:07 -070044673 TEST(QS8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_MUL16_LD128, strided_cm) {
Marat Dukhan4741e412021-06-30 13:38:06 -070044674 GemmMicrokernelTester()
44675 .mr(2)
44676 .nr(4)
44677 .kr(8)
44678 .sr(1)
44679 .m(2)
44680 .n(4)
44681 .k(8)
44682 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080044683 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_mul16_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan4741e412021-06-30 13:38:06 -070044684 }
Marat Dukhan4c617792021-12-21 15:47:58 -080044685#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan4741e412021-06-30 13:38:06 -070044686
44687
Marat Dukhan4c617792021-12-21 15:47:58 -080044688#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhandfc2db02021-08-08 21:19:07 -070044689 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__WASMSIMD_MUL16, k_eq_8) {
Marat Dukhan86746292021-08-06 17:27:18 -070044690 GemmMicrokernelTester()
44691 .extended_weights(true)
44692 .mr(1)
44693 .nr(4)
44694 .kr(8)
44695 .sr(1)
44696 .m(1)
44697 .n(4)
44698 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080044699 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan86746292021-08-06 17:27:18 -070044700 }
44701
Marat Dukhandfc2db02021-08-08 21:19:07 -070044702 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__WASMSIMD_MUL16, strided_cn) {
Marat Dukhan86746292021-08-06 17:27:18 -070044703 GemmMicrokernelTester()
44704 .extended_weights(true)
44705 .mr(1)
44706 .nr(4)
44707 .kr(8)
44708 .sr(1)
44709 .m(1)
44710 .n(4)
44711 .k(8)
44712 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080044713 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan86746292021-08-06 17:27:18 -070044714 }
44715
Marat Dukhandfc2db02021-08-08 21:19:07 -070044716 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__WASMSIMD_MUL16, k_eq_8_strided_a) {
Marat Dukhan86746292021-08-06 17:27:18 -070044717 GemmMicrokernelTester()
44718 .extended_weights(true)
44719 .mr(1)
44720 .nr(4)
44721 .kr(8)
44722 .sr(1)
44723 .m(1)
44724 .n(4)
44725 .k(8)
44726 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080044727 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan86746292021-08-06 17:27:18 -070044728 }
44729
Marat Dukhandfc2db02021-08-08 21:19:07 -070044730 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__WASMSIMD_MUL16, k_eq_8_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080044731 for (uint32_t n = 1; n <= 4; n++) {
44732 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan86746292021-08-06 17:27:18 -070044733 GemmMicrokernelTester()
44734 .extended_weights(true)
44735 .mr(1)
44736 .nr(4)
44737 .kr(8)
44738 .sr(1)
44739 .m(m)
44740 .n(n)
44741 .k(8)
44742 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080044743 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan86746292021-08-06 17:27:18 -070044744 }
44745 }
44746 }
44747
Marat Dukhandfc2db02021-08-08 21:19:07 -070044748 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__WASMSIMD_MUL16, k_eq_8_subtile_m) {
Marat Dukhan86746292021-08-06 17:27:18 -070044749 for (uint32_t m = 1; m <= 1; m++) {
44750 GemmMicrokernelTester()
44751 .extended_weights(true)
44752 .mr(1)
44753 .nr(4)
44754 .kr(8)
44755 .sr(1)
44756 .m(m)
44757 .n(4)
44758 .k(8)
44759 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080044760 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan86746292021-08-06 17:27:18 -070044761 }
44762 }
44763
Marat Dukhandfc2db02021-08-08 21:19:07 -070044764 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__WASMSIMD_MUL16, k_eq_8_subtile_n) {
Marat Dukhan86746292021-08-06 17:27:18 -070044765 for (uint32_t n = 1; n <= 4; n++) {
44766 GemmMicrokernelTester()
44767 .extended_weights(true)
44768 .mr(1)
44769 .nr(4)
44770 .kr(8)
44771 .sr(1)
44772 .m(1)
44773 .n(n)
44774 .k(8)
44775 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080044776 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan86746292021-08-06 17:27:18 -070044777 }
44778 }
44779
Marat Dukhandfc2db02021-08-08 21:19:07 -070044780 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__WASMSIMD_MUL16, k_lt_8) {
Marat Dukhan86746292021-08-06 17:27:18 -070044781 for (size_t k = 1; k < 8; k++) {
44782 GemmMicrokernelTester()
44783 .extended_weights(true)
44784 .mr(1)
44785 .nr(4)
44786 .kr(8)
44787 .sr(1)
44788 .m(1)
44789 .n(4)
44790 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080044791 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan86746292021-08-06 17:27:18 -070044792 }
44793 }
44794
Marat Dukhandfc2db02021-08-08 21:19:07 -070044795 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__WASMSIMD_MUL16, k_lt_8_strided_a) {
Marat Dukhan86746292021-08-06 17:27:18 -070044796 for (size_t k = 1; k < 8; k++) {
44797 GemmMicrokernelTester()
44798 .extended_weights(true)
44799 .mr(1)
44800 .nr(4)
44801 .kr(8)
44802 .sr(1)
44803 .m(1)
44804 .n(4)
44805 .k(k)
44806 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080044807 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan86746292021-08-06 17:27:18 -070044808 }
44809 }
44810
Marat Dukhandfc2db02021-08-08 21:19:07 -070044811 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__WASMSIMD_MUL16, k_lt_8_subtile) {
Marat Dukhan86746292021-08-06 17:27:18 -070044812 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080044813 for (uint32_t n = 1; n <= 4; n++) {
44814 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan86746292021-08-06 17:27:18 -070044815 GemmMicrokernelTester()
44816 .extended_weights(true)
44817 .mr(1)
44818 .nr(4)
44819 .kr(8)
44820 .sr(1)
44821 .m(m)
44822 .n(n)
44823 .k(k)
44824 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080044825 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan86746292021-08-06 17:27:18 -070044826 }
44827 }
44828 }
44829 }
44830
Marat Dukhandfc2db02021-08-08 21:19:07 -070044831 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__WASMSIMD_MUL16, k_gt_8) {
Marat Dukhan86746292021-08-06 17:27:18 -070044832 for (size_t k = 9; k < 16; k++) {
44833 GemmMicrokernelTester()
44834 .extended_weights(true)
44835 .mr(1)
44836 .nr(4)
44837 .kr(8)
44838 .sr(1)
44839 .m(1)
44840 .n(4)
44841 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080044842 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan86746292021-08-06 17:27:18 -070044843 }
44844 }
44845
Marat Dukhandfc2db02021-08-08 21:19:07 -070044846 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__WASMSIMD_MUL16, k_gt_8_strided_a) {
Marat Dukhan86746292021-08-06 17:27:18 -070044847 for (size_t k = 9; k < 16; k++) {
44848 GemmMicrokernelTester()
44849 .extended_weights(true)
44850 .mr(1)
44851 .nr(4)
44852 .kr(8)
44853 .sr(1)
44854 .m(1)
44855 .n(4)
44856 .k(k)
44857 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080044858 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan86746292021-08-06 17:27:18 -070044859 }
44860 }
44861
Marat Dukhandfc2db02021-08-08 21:19:07 -070044862 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__WASMSIMD_MUL16, k_gt_8_subtile) {
Marat Dukhan86746292021-08-06 17:27:18 -070044863 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080044864 for (uint32_t n = 1; n <= 4; n++) {
44865 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan86746292021-08-06 17:27:18 -070044866 GemmMicrokernelTester()
44867 .extended_weights(true)
44868 .mr(1)
44869 .nr(4)
44870 .kr(8)
44871 .sr(1)
44872 .m(m)
44873 .n(n)
44874 .k(k)
44875 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080044876 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan86746292021-08-06 17:27:18 -070044877 }
44878 }
44879 }
44880 }
44881
Marat Dukhandfc2db02021-08-08 21:19:07 -070044882 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__WASMSIMD_MUL16, k_div_8) {
Marat Dukhan86746292021-08-06 17:27:18 -070044883 for (size_t k = 16; k <= 80; k += 8) {
44884 GemmMicrokernelTester()
44885 .extended_weights(true)
44886 .mr(1)
44887 .nr(4)
44888 .kr(8)
44889 .sr(1)
44890 .m(1)
44891 .n(4)
44892 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080044893 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan86746292021-08-06 17:27:18 -070044894 }
44895 }
44896
Marat Dukhandfc2db02021-08-08 21:19:07 -070044897 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__WASMSIMD_MUL16, k_div_8_strided_a) {
Marat Dukhan86746292021-08-06 17:27:18 -070044898 for (size_t k = 16; k <= 80; k += 8) {
44899 GemmMicrokernelTester()
44900 .extended_weights(true)
44901 .mr(1)
44902 .nr(4)
44903 .kr(8)
44904 .sr(1)
44905 .m(1)
44906 .n(4)
44907 .k(k)
44908 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080044909 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan86746292021-08-06 17:27:18 -070044910 }
44911 }
44912
Marat Dukhandfc2db02021-08-08 21:19:07 -070044913 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__WASMSIMD_MUL16, k_div_8_subtile) {
Marat Dukhan86746292021-08-06 17:27:18 -070044914 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080044915 for (uint32_t n = 1; n <= 4; n++) {
44916 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan86746292021-08-06 17:27:18 -070044917 GemmMicrokernelTester()
44918 .extended_weights(true)
44919 .mr(1)
44920 .nr(4)
44921 .kr(8)
44922 .sr(1)
44923 .m(m)
44924 .n(n)
44925 .k(k)
44926 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080044927 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan86746292021-08-06 17:27:18 -070044928 }
44929 }
44930 }
44931 }
44932
Marat Dukhandfc2db02021-08-08 21:19:07 -070044933 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__WASMSIMD_MUL16, n_gt_4) {
Marat Dukhan86746292021-08-06 17:27:18 -070044934 for (uint32_t n = 5; n < 8; n++) {
44935 for (size_t k = 1; k <= 40; k += 9) {
44936 GemmMicrokernelTester()
44937 .extended_weights(true)
44938 .mr(1)
44939 .nr(4)
44940 .kr(8)
44941 .sr(1)
44942 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080044943 .n(n)
Marat Dukhan86746292021-08-06 17:27:18 -070044944 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080044945 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan86746292021-08-06 17:27:18 -070044946 }
44947 }
44948 }
44949
Marat Dukhandfc2db02021-08-08 21:19:07 -070044950 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__WASMSIMD_MUL16, n_gt_4_strided_cn) {
Marat Dukhan86746292021-08-06 17:27:18 -070044951 for (uint32_t n = 5; n < 8; n++) {
44952 for (size_t k = 1; k <= 40; k += 9) {
44953 GemmMicrokernelTester()
44954 .extended_weights(true)
44955 .mr(1)
44956 .nr(4)
44957 .kr(8)
44958 .sr(1)
44959 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080044960 .n(n)
Marat Dukhan86746292021-08-06 17:27:18 -070044961 .k(k)
44962 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080044963 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan86746292021-08-06 17:27:18 -070044964 }
44965 }
44966 }
44967
Marat Dukhandfc2db02021-08-08 21:19:07 -070044968 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__WASMSIMD_MUL16, n_gt_4_strided_a) {
Marat Dukhan86746292021-08-06 17:27:18 -070044969 for (uint32_t n = 5; n < 8; n++) {
44970 for (size_t k = 1; k <= 40; k += 9) {
44971 GemmMicrokernelTester()
44972 .extended_weights(true)
44973 .mr(1)
44974 .nr(4)
44975 .kr(8)
44976 .sr(1)
44977 .m(1)
44978 .n(n)
44979 .k(k)
44980 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080044981 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan86746292021-08-06 17:27:18 -070044982 }
44983 }
44984 }
44985
Marat Dukhandfc2db02021-08-08 21:19:07 -070044986 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__WASMSIMD_MUL16, n_gt_4_subtile) {
Marat Dukhan86746292021-08-06 17:27:18 -070044987 for (uint32_t n = 5; n < 8; n++) {
44988 for (size_t k = 1; k <= 40; k += 9) {
44989 for (uint32_t m = 1; m <= 1; m++) {
44990 GemmMicrokernelTester()
44991 .extended_weights(true)
44992 .mr(1)
44993 .nr(4)
44994 .kr(8)
44995 .sr(1)
44996 .m(m)
44997 .n(n)
44998 .k(k)
44999 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080045000 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan86746292021-08-06 17:27:18 -070045001 }
45002 }
45003 }
45004 }
45005
Marat Dukhandfc2db02021-08-08 21:19:07 -070045006 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__WASMSIMD_MUL16, n_div_4) {
Marat Dukhan86746292021-08-06 17:27:18 -070045007 for (uint32_t n = 8; n <= 12; n += 4) {
45008 for (size_t k = 1; k <= 40; k += 9) {
45009 GemmMicrokernelTester()
45010 .extended_weights(true)
45011 .mr(1)
45012 .nr(4)
45013 .kr(8)
45014 .sr(1)
45015 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080045016 .n(n)
Marat Dukhan86746292021-08-06 17:27:18 -070045017 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080045018 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan86746292021-08-06 17:27:18 -070045019 }
45020 }
45021 }
45022
Marat Dukhandfc2db02021-08-08 21:19:07 -070045023 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__WASMSIMD_MUL16, n_div_4_strided_cn) {
Marat Dukhan86746292021-08-06 17:27:18 -070045024 for (uint32_t n = 8; n <= 12; n += 4) {
45025 for (size_t k = 1; k <= 40; k += 9) {
45026 GemmMicrokernelTester()
45027 .extended_weights(true)
45028 .mr(1)
45029 .nr(4)
45030 .kr(8)
45031 .sr(1)
45032 .m(1)
45033 .n(n)
45034 .k(k)
45035 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080045036 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan86746292021-08-06 17:27:18 -070045037 }
45038 }
45039 }
45040
Marat Dukhandfc2db02021-08-08 21:19:07 -070045041 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__WASMSIMD_MUL16, n_div_4_strided_a) {
Marat Dukhan86746292021-08-06 17:27:18 -070045042 for (uint32_t n = 8; n <= 12; n += 4) {
45043 for (size_t k = 1; k <= 40; k += 9) {
45044 GemmMicrokernelTester()
45045 .extended_weights(true)
45046 .mr(1)
45047 .nr(4)
45048 .kr(8)
45049 .sr(1)
45050 .m(1)
45051 .n(n)
45052 .k(k)
45053 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080045054 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan86746292021-08-06 17:27:18 -070045055 }
45056 }
45057 }
45058
Marat Dukhandfc2db02021-08-08 21:19:07 -070045059 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__WASMSIMD_MUL16, n_div_4_subtile) {
Marat Dukhan86746292021-08-06 17:27:18 -070045060 for (uint32_t n = 8; n <= 12; n += 4) {
45061 for (size_t k = 1; k <= 40; k += 9) {
45062 for (uint32_t m = 1; m <= 1; m++) {
45063 GemmMicrokernelTester()
45064 .extended_weights(true)
45065 .mr(1)
45066 .nr(4)
45067 .kr(8)
45068 .sr(1)
45069 .m(m)
45070 .n(n)
45071 .k(k)
45072 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080045073 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan86746292021-08-06 17:27:18 -070045074 }
45075 }
45076 }
45077 }
45078
Marat Dukhandfc2db02021-08-08 21:19:07 -070045079 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__WASMSIMD_MUL16, strided_cm_subtile) {
Marat Dukhan86746292021-08-06 17:27:18 -070045080 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080045081 for (uint32_t n = 1; n <= 4; n++) {
45082 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan86746292021-08-06 17:27:18 -070045083 GemmMicrokernelTester()
45084 .extended_weights(true)
45085 .mr(1)
45086 .nr(4)
45087 .kr(8)
45088 .sr(1)
45089 .m(m)
45090 .n(n)
45091 .k(k)
45092 .cm_stride(7)
45093 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080045094 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan86746292021-08-06 17:27:18 -070045095 }
45096 }
45097 }
45098 }
45099
Marat Dukhandfc2db02021-08-08 21:19:07 -070045100 TEST(QS8_GEMM_XW_MINMAX_FP32_1X4C8__WASMSIMD_MUL16, strided_cm) {
Marat Dukhan86746292021-08-06 17:27:18 -070045101 GemmMicrokernelTester()
45102 .extended_weights(true)
45103 .mr(1)
45104 .nr(4)
45105 .kr(8)
45106 .sr(1)
45107 .m(1)
45108 .n(4)
45109 .k(8)
45110 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080045111 .Test(xnn_qs8_gemm_xw_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
Marat Dukhan86746292021-08-06 17:27:18 -070045112 }
Marat Dukhan4c617792021-12-21 15:47:58 -080045113#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan86746292021-08-06 17:27:18 -070045114
45115
Marat Dukhan7c1115f2022-01-04 17:18:41 -080045116#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
45117 TEST(QS8_GEMM_MINMAX_FP32_3X2__WASM_FMAGIC, k_eq_1) {
45118 GemmMicrokernelTester()
45119 .mr(3)
45120 .nr(2)
45121 .kr(1)
45122 .sr(1)
45123 .m(3)
45124 .n(2)
45125 .k(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080045126 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080045127 }
45128
45129 TEST(QS8_GEMM_MINMAX_FP32_3X2__WASM_FMAGIC, strided_cn) {
45130 GemmMicrokernelTester()
45131 .mr(3)
45132 .nr(2)
45133 .kr(1)
45134 .sr(1)
45135 .m(3)
45136 .n(2)
45137 .k(1)
45138 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080045139 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080045140 }
45141
45142 TEST(QS8_GEMM_MINMAX_FP32_3X2__WASM_FMAGIC, k_eq_1_strided_a) {
45143 GemmMicrokernelTester()
45144 .mr(3)
45145 .nr(2)
45146 .kr(1)
45147 .sr(1)
45148 .m(3)
45149 .n(2)
45150 .k(1)
45151 .a_stride(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080045152 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080045153 }
45154
45155 TEST(QS8_GEMM_MINMAX_FP32_3X2__WASM_FMAGIC, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080045156 for (uint32_t n = 1; n <= 2; n++) {
45157 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan7c1115f2022-01-04 17:18:41 -080045158 GemmMicrokernelTester()
45159 .mr(3)
45160 .nr(2)
45161 .kr(1)
45162 .sr(1)
45163 .m(m)
45164 .n(n)
45165 .k(1)
45166 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080045167 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080045168 }
45169 }
45170 }
45171
45172 TEST(QS8_GEMM_MINMAX_FP32_3X2__WASM_FMAGIC, k_eq_1_subtile_m) {
45173 for (uint32_t m = 1; m <= 3; m++) {
45174 GemmMicrokernelTester()
45175 .mr(3)
45176 .nr(2)
45177 .kr(1)
45178 .sr(1)
45179 .m(m)
45180 .n(2)
45181 .k(1)
45182 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080045183 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080045184 }
45185 }
45186
45187 TEST(QS8_GEMM_MINMAX_FP32_3X2__WASM_FMAGIC, k_eq_1_subtile_n) {
45188 for (uint32_t n = 1; n <= 2; n++) {
45189 GemmMicrokernelTester()
45190 .mr(3)
45191 .nr(2)
45192 .kr(1)
45193 .sr(1)
45194 .m(3)
45195 .n(n)
45196 .k(1)
45197 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080045198 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080045199 }
45200 }
45201
45202 TEST(QS8_GEMM_MINMAX_FP32_3X2__WASM_FMAGIC, k_gt_1) {
45203 for (size_t k = 2; k < 10; k++) {
45204 GemmMicrokernelTester()
45205 .mr(3)
45206 .nr(2)
45207 .kr(1)
45208 .sr(1)
45209 .m(3)
45210 .n(2)
45211 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080045212 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080045213 }
45214 }
45215
45216 TEST(QS8_GEMM_MINMAX_FP32_3X2__WASM_FMAGIC, k_gt_1_strided_a) {
45217 for (size_t k = 2; k < 10; k++) {
45218 GemmMicrokernelTester()
45219 .mr(3)
45220 .nr(2)
45221 .kr(1)
45222 .sr(1)
45223 .m(3)
45224 .n(2)
45225 .k(k)
45226 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080045227 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080045228 }
45229 }
45230
45231 TEST(QS8_GEMM_MINMAX_FP32_3X2__WASM_FMAGIC, k_gt_1_subtile) {
45232 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080045233 for (uint32_t n = 1; n <= 2; n++) {
45234 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan7c1115f2022-01-04 17:18:41 -080045235 GemmMicrokernelTester()
45236 .mr(3)
45237 .nr(2)
45238 .kr(1)
45239 .sr(1)
45240 .m(m)
45241 .n(n)
45242 .k(k)
45243 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080045244 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080045245 }
45246 }
45247 }
45248 }
45249
45250 TEST(QS8_GEMM_MINMAX_FP32_3X2__WASM_FMAGIC, n_gt_2) {
45251 for (uint32_t n = 3; n < 4; n++) {
45252 for (size_t k = 1; k <= 5; k += 2) {
45253 GemmMicrokernelTester()
45254 .mr(3)
45255 .nr(2)
45256 .kr(1)
45257 .sr(1)
45258 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080045259 .n(n)
Marat Dukhan7c1115f2022-01-04 17:18:41 -080045260 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080045261 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080045262 }
45263 }
45264 }
45265
45266 TEST(QS8_GEMM_MINMAX_FP32_3X2__WASM_FMAGIC, n_gt_2_strided_cn) {
45267 for (uint32_t n = 3; n < 4; n++) {
45268 for (size_t k = 1; k <= 5; k += 2) {
45269 GemmMicrokernelTester()
45270 .mr(3)
45271 .nr(2)
45272 .kr(1)
45273 .sr(1)
45274 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080045275 .n(n)
Marat Dukhan7c1115f2022-01-04 17:18:41 -080045276 .k(k)
45277 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080045278 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080045279 }
45280 }
45281 }
45282
45283 TEST(QS8_GEMM_MINMAX_FP32_3X2__WASM_FMAGIC, n_gt_2_strided_a) {
45284 for (uint32_t n = 3; n < 4; n++) {
45285 for (size_t k = 1; k <= 5; k += 2) {
45286 GemmMicrokernelTester()
45287 .mr(3)
45288 .nr(2)
45289 .kr(1)
45290 .sr(1)
45291 .m(3)
45292 .n(n)
45293 .k(k)
45294 .a_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080045295 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080045296 }
45297 }
45298 }
45299
45300 TEST(QS8_GEMM_MINMAX_FP32_3X2__WASM_FMAGIC, n_gt_2_subtile) {
45301 for (uint32_t n = 3; n < 4; n++) {
45302 for (size_t k = 1; k <= 5; k += 2) {
45303 for (uint32_t m = 1; m <= 3; m++) {
45304 GemmMicrokernelTester()
45305 .mr(3)
45306 .nr(2)
45307 .kr(1)
45308 .sr(1)
45309 .m(m)
45310 .n(n)
45311 .k(k)
45312 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080045313 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080045314 }
45315 }
45316 }
45317 }
45318
45319 TEST(QS8_GEMM_MINMAX_FP32_3X2__WASM_FMAGIC, n_div_2) {
45320 for (uint32_t n = 4; n <= 6; n += 2) {
45321 for (size_t k = 1; k <= 5; k += 2) {
45322 GemmMicrokernelTester()
45323 .mr(3)
45324 .nr(2)
45325 .kr(1)
45326 .sr(1)
45327 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080045328 .n(n)
Marat Dukhan7c1115f2022-01-04 17:18:41 -080045329 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080045330 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080045331 }
45332 }
45333 }
45334
45335 TEST(QS8_GEMM_MINMAX_FP32_3X2__WASM_FMAGIC, n_div_2_strided_cn) {
45336 for (uint32_t n = 4; n <= 6; n += 2) {
45337 for (size_t k = 1; k <= 5; k += 2) {
45338 GemmMicrokernelTester()
45339 .mr(3)
45340 .nr(2)
45341 .kr(1)
45342 .sr(1)
45343 .m(3)
45344 .n(n)
45345 .k(k)
45346 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080045347 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080045348 }
45349 }
45350 }
45351
45352 TEST(QS8_GEMM_MINMAX_FP32_3X2__WASM_FMAGIC, n_div_2_strided_a) {
45353 for (uint32_t n = 4; n <= 6; n += 2) {
45354 for (size_t k = 1; k <= 5; k += 2) {
45355 GemmMicrokernelTester()
45356 .mr(3)
45357 .nr(2)
45358 .kr(1)
45359 .sr(1)
45360 .m(3)
45361 .n(n)
45362 .k(k)
45363 .a_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080045364 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080045365 }
45366 }
45367 }
45368
45369 TEST(QS8_GEMM_MINMAX_FP32_3X2__WASM_FMAGIC, n_div_2_subtile) {
45370 for (uint32_t n = 4; n <= 6; n += 2) {
45371 for (size_t k = 1; k <= 5; k += 2) {
45372 for (uint32_t m = 1; m <= 3; m++) {
45373 GemmMicrokernelTester()
45374 .mr(3)
45375 .nr(2)
45376 .kr(1)
45377 .sr(1)
45378 .m(m)
45379 .n(n)
45380 .k(k)
45381 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080045382 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080045383 }
45384 }
45385 }
45386 }
45387
45388 TEST(QS8_GEMM_MINMAX_FP32_3X2__WASM_FMAGIC, strided_cm_subtile) {
45389 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080045390 for (uint32_t n = 1; n <= 2; n++) {
45391 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan7c1115f2022-01-04 17:18:41 -080045392 GemmMicrokernelTester()
45393 .mr(3)
45394 .nr(2)
45395 .kr(1)
45396 .sr(1)
45397 .m(m)
45398 .n(n)
45399 .k(k)
45400 .cm_stride(5)
45401 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080045402 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080045403 }
45404 }
45405 }
45406 }
45407
45408 TEST(QS8_GEMM_MINMAX_FP32_3X2__WASM_FMAGIC, qmin) {
45409 GemmMicrokernelTester()
45410 .mr(3)
45411 .nr(2)
45412 .kr(1)
45413 .sr(1)
45414 .m(3)
45415 .n(2)
45416 .k(1)
45417 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080045418 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080045419 }
45420
45421 TEST(QS8_GEMM_MINMAX_FP32_3X2__WASM_FMAGIC, qmax) {
45422 GemmMicrokernelTester()
45423 .mr(3)
45424 .nr(2)
45425 .kr(1)
45426 .sr(1)
45427 .m(3)
45428 .n(2)
45429 .k(1)
45430 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080045431 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080045432 }
45433
45434 TEST(QS8_GEMM_MINMAX_FP32_3X2__WASM_FMAGIC, strided_cm) {
45435 GemmMicrokernelTester()
45436 .mr(3)
45437 .nr(2)
45438 .kr(1)
45439 .sr(1)
45440 .m(3)
45441 .n(2)
45442 .k(1)
45443 .cm_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080045444 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080045445 }
45446#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
45447
45448
45449#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan7c1115f2022-01-04 17:18:41 -080045450 TEST(QS8_GEMM_MINMAX_FP32_3X4__WASM_FMAGIC, k_eq_1) {
45451 GemmMicrokernelTester()
45452 .mr(3)
45453 .nr(4)
45454 .kr(1)
45455 .sr(1)
45456 .m(3)
45457 .n(4)
45458 .k(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080045459 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080045460 }
45461
45462 TEST(QS8_GEMM_MINMAX_FP32_3X4__WASM_FMAGIC, strided_cn) {
45463 GemmMicrokernelTester()
45464 .mr(3)
45465 .nr(4)
45466 .kr(1)
45467 .sr(1)
45468 .m(3)
45469 .n(4)
45470 .k(1)
45471 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080045472 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080045473 }
45474
45475 TEST(QS8_GEMM_MINMAX_FP32_3X4__WASM_FMAGIC, k_eq_1_strided_a) {
45476 GemmMicrokernelTester()
45477 .mr(3)
45478 .nr(4)
45479 .kr(1)
45480 .sr(1)
45481 .m(3)
45482 .n(4)
45483 .k(1)
45484 .a_stride(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080045485 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080045486 }
45487
45488 TEST(QS8_GEMM_MINMAX_FP32_3X4__WASM_FMAGIC, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080045489 for (uint32_t n = 1; n <= 4; n++) {
45490 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan7c1115f2022-01-04 17:18:41 -080045491 GemmMicrokernelTester()
45492 .mr(3)
45493 .nr(4)
45494 .kr(1)
45495 .sr(1)
45496 .m(m)
45497 .n(n)
45498 .k(1)
45499 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080045500 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080045501 }
45502 }
45503 }
45504
45505 TEST(QS8_GEMM_MINMAX_FP32_3X4__WASM_FMAGIC, k_eq_1_subtile_m) {
45506 for (uint32_t m = 1; m <= 3; m++) {
45507 GemmMicrokernelTester()
45508 .mr(3)
45509 .nr(4)
45510 .kr(1)
45511 .sr(1)
45512 .m(m)
45513 .n(4)
45514 .k(1)
45515 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080045516 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080045517 }
45518 }
45519
45520 TEST(QS8_GEMM_MINMAX_FP32_3X4__WASM_FMAGIC, k_eq_1_subtile_n) {
45521 for (uint32_t n = 1; n <= 4; n++) {
45522 GemmMicrokernelTester()
45523 .mr(3)
45524 .nr(4)
45525 .kr(1)
45526 .sr(1)
45527 .m(3)
45528 .n(n)
45529 .k(1)
45530 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080045531 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080045532 }
45533 }
45534
45535 TEST(QS8_GEMM_MINMAX_FP32_3X4__WASM_FMAGIC, k_gt_1) {
45536 for (size_t k = 2; k < 10; k++) {
45537 GemmMicrokernelTester()
45538 .mr(3)
45539 .nr(4)
45540 .kr(1)
45541 .sr(1)
45542 .m(3)
45543 .n(4)
45544 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080045545 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080045546 }
45547 }
45548
45549 TEST(QS8_GEMM_MINMAX_FP32_3X4__WASM_FMAGIC, k_gt_1_strided_a) {
45550 for (size_t k = 2; k < 10; k++) {
45551 GemmMicrokernelTester()
45552 .mr(3)
45553 .nr(4)
45554 .kr(1)
45555 .sr(1)
45556 .m(3)
45557 .n(4)
45558 .k(k)
45559 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080045560 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080045561 }
45562 }
45563
45564 TEST(QS8_GEMM_MINMAX_FP32_3X4__WASM_FMAGIC, k_gt_1_subtile) {
45565 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080045566 for (uint32_t n = 1; n <= 4; n++) {
45567 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan7c1115f2022-01-04 17:18:41 -080045568 GemmMicrokernelTester()
45569 .mr(3)
45570 .nr(4)
45571 .kr(1)
45572 .sr(1)
45573 .m(m)
45574 .n(n)
45575 .k(k)
45576 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080045577 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080045578 }
45579 }
45580 }
45581 }
45582
45583 TEST(QS8_GEMM_MINMAX_FP32_3X4__WASM_FMAGIC, n_gt_4) {
45584 for (uint32_t n = 5; n < 8; n++) {
45585 for (size_t k = 1; k <= 5; k += 2) {
45586 GemmMicrokernelTester()
45587 .mr(3)
45588 .nr(4)
45589 .kr(1)
45590 .sr(1)
45591 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080045592 .n(n)
Marat Dukhan7c1115f2022-01-04 17:18:41 -080045593 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080045594 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080045595 }
45596 }
45597 }
45598
45599 TEST(QS8_GEMM_MINMAX_FP32_3X4__WASM_FMAGIC, n_gt_4_strided_cn) {
45600 for (uint32_t n = 5; n < 8; n++) {
45601 for (size_t k = 1; k <= 5; k += 2) {
45602 GemmMicrokernelTester()
45603 .mr(3)
45604 .nr(4)
45605 .kr(1)
45606 .sr(1)
45607 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080045608 .n(n)
Marat Dukhan7c1115f2022-01-04 17:18:41 -080045609 .k(k)
45610 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080045611 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080045612 }
45613 }
45614 }
45615
45616 TEST(QS8_GEMM_MINMAX_FP32_3X4__WASM_FMAGIC, n_gt_4_strided_a) {
45617 for (uint32_t n = 5; n < 8; n++) {
45618 for (size_t k = 1; k <= 5; k += 2) {
45619 GemmMicrokernelTester()
45620 .mr(3)
45621 .nr(4)
45622 .kr(1)
45623 .sr(1)
45624 .m(3)
45625 .n(n)
45626 .k(k)
45627 .a_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080045628 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080045629 }
45630 }
45631 }
45632
45633 TEST(QS8_GEMM_MINMAX_FP32_3X4__WASM_FMAGIC, n_gt_4_subtile) {
45634 for (uint32_t n = 5; n < 8; n++) {
45635 for (size_t k = 1; k <= 5; k += 2) {
45636 for (uint32_t m = 1; m <= 3; m++) {
45637 GemmMicrokernelTester()
45638 .mr(3)
45639 .nr(4)
45640 .kr(1)
45641 .sr(1)
45642 .m(m)
45643 .n(n)
45644 .k(k)
45645 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080045646 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080045647 }
45648 }
45649 }
45650 }
45651
45652 TEST(QS8_GEMM_MINMAX_FP32_3X4__WASM_FMAGIC, n_div_4) {
45653 for (uint32_t n = 8; n <= 12; n += 4) {
45654 for (size_t k = 1; k <= 5; k += 2) {
45655 GemmMicrokernelTester()
45656 .mr(3)
45657 .nr(4)
45658 .kr(1)
45659 .sr(1)
45660 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080045661 .n(n)
Marat Dukhan7c1115f2022-01-04 17:18:41 -080045662 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080045663 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080045664 }
45665 }
45666 }
45667
45668 TEST(QS8_GEMM_MINMAX_FP32_3X4__WASM_FMAGIC, n_div_4_strided_cn) {
45669 for (uint32_t n = 8; n <= 12; n += 4) {
45670 for (size_t k = 1; k <= 5; k += 2) {
45671 GemmMicrokernelTester()
45672 .mr(3)
45673 .nr(4)
45674 .kr(1)
45675 .sr(1)
45676 .m(3)
45677 .n(n)
45678 .k(k)
45679 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080045680 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080045681 }
45682 }
45683 }
45684
45685 TEST(QS8_GEMM_MINMAX_FP32_3X4__WASM_FMAGIC, n_div_4_strided_a) {
45686 for (uint32_t n = 8; n <= 12; n += 4) {
45687 for (size_t k = 1; k <= 5; k += 2) {
45688 GemmMicrokernelTester()
45689 .mr(3)
45690 .nr(4)
45691 .kr(1)
45692 .sr(1)
45693 .m(3)
45694 .n(n)
45695 .k(k)
45696 .a_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080045697 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080045698 }
45699 }
45700 }
45701
45702 TEST(QS8_GEMM_MINMAX_FP32_3X4__WASM_FMAGIC, n_div_4_subtile) {
45703 for (uint32_t n = 8; n <= 12; n += 4) {
45704 for (size_t k = 1; k <= 5; k += 2) {
45705 for (uint32_t m = 1; m <= 3; m++) {
45706 GemmMicrokernelTester()
45707 .mr(3)
45708 .nr(4)
45709 .kr(1)
45710 .sr(1)
45711 .m(m)
45712 .n(n)
45713 .k(k)
45714 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080045715 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080045716 }
45717 }
45718 }
45719 }
45720
45721 TEST(QS8_GEMM_MINMAX_FP32_3X4__WASM_FMAGIC, strided_cm_subtile) {
45722 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080045723 for (uint32_t n = 1; n <= 4; n++) {
45724 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan7c1115f2022-01-04 17:18:41 -080045725 GemmMicrokernelTester()
45726 .mr(3)
45727 .nr(4)
45728 .kr(1)
45729 .sr(1)
45730 .m(m)
45731 .n(n)
45732 .k(k)
45733 .cm_stride(7)
45734 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080045735 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080045736 }
45737 }
45738 }
45739 }
45740
45741 TEST(QS8_GEMM_MINMAX_FP32_3X4__WASM_FMAGIC, qmin) {
45742 GemmMicrokernelTester()
45743 .mr(3)
45744 .nr(4)
45745 .kr(1)
45746 .sr(1)
45747 .m(3)
45748 .n(4)
45749 .k(1)
45750 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080045751 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080045752 }
45753
45754 TEST(QS8_GEMM_MINMAX_FP32_3X4__WASM_FMAGIC, qmax) {
45755 GemmMicrokernelTester()
45756 .mr(3)
45757 .nr(4)
45758 .kr(1)
45759 .sr(1)
45760 .m(3)
45761 .n(4)
45762 .k(1)
45763 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080045764 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080045765 }
45766
45767 TEST(QS8_GEMM_MINMAX_FP32_3X4__WASM_FMAGIC, strided_cm) {
45768 GemmMicrokernelTester()
45769 .mr(3)
45770 .nr(4)
45771 .kr(1)
45772 .sr(1)
45773 .m(3)
45774 .n(4)
45775 .k(1)
45776 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080045777 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080045778 }
45779#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
45780
45781
Marat Dukhan2ac722e2022-01-04 01:54:20 -080045782TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, k_eq_1) {
Marat Dukhan779b2532021-06-29 14:14:13 -070045783 GemmMicrokernelTester()
45784 .mr(1)
45785 .nr(2)
45786 .kr(1)
45787 .sr(1)
45788 .m(1)
45789 .n(2)
45790 .k(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080045791 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070045792}
45793
Marat Dukhan2ac722e2022-01-04 01:54:20 -080045794TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, strided_cn) {
Marat Dukhan779b2532021-06-29 14:14:13 -070045795 GemmMicrokernelTester()
45796 .mr(1)
45797 .nr(2)
45798 .kr(1)
45799 .sr(1)
45800 .m(1)
45801 .n(2)
45802 .k(1)
45803 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080045804 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070045805}
45806
Marat Dukhan2ac722e2022-01-04 01:54:20 -080045807TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, k_eq_1_strided_a) {
Marat Dukhan779b2532021-06-29 14:14:13 -070045808 GemmMicrokernelTester()
45809 .mr(1)
45810 .nr(2)
45811 .kr(1)
45812 .sr(1)
45813 .m(1)
45814 .n(2)
45815 .k(1)
45816 .a_stride(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080045817 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070045818}
45819
Marat Dukhan2ac722e2022-01-04 01:54:20 -080045820TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080045821 for (uint32_t n = 1; n <= 2; n++) {
45822 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan779b2532021-06-29 14:14:13 -070045823 GemmMicrokernelTester()
45824 .mr(1)
45825 .nr(2)
45826 .kr(1)
45827 .sr(1)
45828 .m(m)
45829 .n(n)
45830 .k(1)
45831 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080045832 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070045833 }
45834 }
45835}
45836
Marat Dukhan2ac722e2022-01-04 01:54:20 -080045837TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, k_eq_1_subtile_m) {
Marat Dukhan779b2532021-06-29 14:14:13 -070045838 for (uint32_t m = 1; m <= 1; m++) {
45839 GemmMicrokernelTester()
45840 .mr(1)
45841 .nr(2)
45842 .kr(1)
45843 .sr(1)
45844 .m(m)
45845 .n(2)
45846 .k(1)
45847 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080045848 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070045849 }
45850}
45851
Marat Dukhan2ac722e2022-01-04 01:54:20 -080045852TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, k_eq_1_subtile_n) {
Marat Dukhan779b2532021-06-29 14:14:13 -070045853 for (uint32_t n = 1; n <= 2; n++) {
45854 GemmMicrokernelTester()
45855 .mr(1)
45856 .nr(2)
45857 .kr(1)
45858 .sr(1)
45859 .m(1)
45860 .n(n)
45861 .k(1)
45862 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080045863 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070045864 }
45865}
45866
Marat Dukhan2ac722e2022-01-04 01:54:20 -080045867TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, k_gt_1) {
Marat Dukhan779b2532021-06-29 14:14:13 -070045868 for (size_t k = 2; k < 10; k++) {
45869 GemmMicrokernelTester()
45870 .mr(1)
45871 .nr(2)
45872 .kr(1)
45873 .sr(1)
45874 .m(1)
45875 .n(2)
45876 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080045877 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070045878 }
45879}
45880
Marat Dukhan2ac722e2022-01-04 01:54:20 -080045881TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, k_gt_1_strided_a) {
Marat Dukhan779b2532021-06-29 14:14:13 -070045882 for (size_t k = 2; k < 10; k++) {
45883 GemmMicrokernelTester()
45884 .mr(1)
45885 .nr(2)
45886 .kr(1)
45887 .sr(1)
45888 .m(1)
45889 .n(2)
45890 .k(k)
45891 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080045892 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070045893 }
45894}
45895
Marat Dukhan2ac722e2022-01-04 01:54:20 -080045896TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, k_gt_1_subtile) {
Marat Dukhan779b2532021-06-29 14:14:13 -070045897 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080045898 for (uint32_t n = 1; n <= 2; n++) {
45899 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan779b2532021-06-29 14:14:13 -070045900 GemmMicrokernelTester()
45901 .mr(1)
45902 .nr(2)
45903 .kr(1)
45904 .sr(1)
45905 .m(m)
45906 .n(n)
45907 .k(k)
45908 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080045909 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070045910 }
45911 }
45912 }
45913}
45914
Marat Dukhan2ac722e2022-01-04 01:54:20 -080045915TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, n_gt_2) {
Marat Dukhan779b2532021-06-29 14:14:13 -070045916 for (uint32_t n = 3; n < 4; n++) {
45917 for (size_t k = 1; k <= 5; k += 2) {
45918 GemmMicrokernelTester()
45919 .mr(1)
45920 .nr(2)
45921 .kr(1)
45922 .sr(1)
45923 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080045924 .n(n)
Marat Dukhan779b2532021-06-29 14:14:13 -070045925 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080045926 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070045927 }
45928 }
45929}
45930
Marat Dukhan2ac722e2022-01-04 01:54:20 -080045931TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, n_gt_2_strided_cn) {
Marat Dukhan779b2532021-06-29 14:14:13 -070045932 for (uint32_t n = 3; n < 4; n++) {
45933 for (size_t k = 1; k <= 5; k += 2) {
45934 GemmMicrokernelTester()
45935 .mr(1)
45936 .nr(2)
45937 .kr(1)
45938 .sr(1)
45939 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080045940 .n(n)
Marat Dukhan779b2532021-06-29 14:14:13 -070045941 .k(k)
45942 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080045943 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070045944 }
45945 }
45946}
45947
Marat Dukhan2ac722e2022-01-04 01:54:20 -080045948TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, n_gt_2_strided_a) {
Marat Dukhan779b2532021-06-29 14:14:13 -070045949 for (uint32_t n = 3; n < 4; n++) {
45950 for (size_t k = 1; k <= 5; k += 2) {
45951 GemmMicrokernelTester()
45952 .mr(1)
45953 .nr(2)
45954 .kr(1)
45955 .sr(1)
45956 .m(1)
45957 .n(n)
45958 .k(k)
45959 .a_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080045960 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070045961 }
45962 }
45963}
45964
Marat Dukhan2ac722e2022-01-04 01:54:20 -080045965TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, n_gt_2_subtile) {
Marat Dukhan779b2532021-06-29 14:14:13 -070045966 for (uint32_t n = 3; n < 4; n++) {
45967 for (size_t k = 1; k <= 5; k += 2) {
45968 for (uint32_t m = 1; m <= 1; m++) {
45969 GemmMicrokernelTester()
45970 .mr(1)
45971 .nr(2)
45972 .kr(1)
45973 .sr(1)
45974 .m(m)
45975 .n(n)
45976 .k(k)
45977 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080045978 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070045979 }
45980 }
45981 }
45982}
45983
Marat Dukhan2ac722e2022-01-04 01:54:20 -080045984TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, n_div_2) {
Marat Dukhan779b2532021-06-29 14:14:13 -070045985 for (uint32_t n = 4; n <= 6; n += 2) {
45986 for (size_t k = 1; k <= 5; k += 2) {
45987 GemmMicrokernelTester()
45988 .mr(1)
45989 .nr(2)
45990 .kr(1)
45991 .sr(1)
45992 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080045993 .n(n)
Marat Dukhan779b2532021-06-29 14:14:13 -070045994 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080045995 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070045996 }
45997 }
45998}
45999
Marat Dukhan2ac722e2022-01-04 01:54:20 -080046000TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, n_div_2_strided_cn) {
Marat Dukhan779b2532021-06-29 14:14:13 -070046001 for (uint32_t n = 4; n <= 6; n += 2) {
46002 for (size_t k = 1; k <= 5; k += 2) {
46003 GemmMicrokernelTester()
46004 .mr(1)
46005 .nr(2)
46006 .kr(1)
46007 .sr(1)
46008 .m(1)
46009 .n(n)
46010 .k(k)
46011 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080046012 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070046013 }
46014 }
46015}
46016
Marat Dukhan2ac722e2022-01-04 01:54:20 -080046017TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, n_div_2_strided_a) {
Marat Dukhan779b2532021-06-29 14:14:13 -070046018 for (uint32_t n = 4; n <= 6; n += 2) {
46019 for (size_t k = 1; k <= 5; k += 2) {
46020 GemmMicrokernelTester()
46021 .mr(1)
46022 .nr(2)
46023 .kr(1)
46024 .sr(1)
46025 .m(1)
46026 .n(n)
46027 .k(k)
46028 .a_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080046029 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070046030 }
46031 }
46032}
46033
Marat Dukhan2ac722e2022-01-04 01:54:20 -080046034TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, n_div_2_subtile) {
Marat Dukhan779b2532021-06-29 14:14:13 -070046035 for (uint32_t n = 4; n <= 6; n += 2) {
46036 for (size_t k = 1; k <= 5; k += 2) {
46037 for (uint32_t m = 1; m <= 1; m++) {
46038 GemmMicrokernelTester()
46039 .mr(1)
46040 .nr(2)
46041 .kr(1)
46042 .sr(1)
46043 .m(m)
46044 .n(n)
46045 .k(k)
46046 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080046047 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070046048 }
46049 }
46050 }
46051}
46052
Marat Dukhan2ac722e2022-01-04 01:54:20 -080046053TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, strided_cm_subtile) {
Marat Dukhan779b2532021-06-29 14:14:13 -070046054 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080046055 for (uint32_t n = 1; n <= 2; n++) {
46056 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan779b2532021-06-29 14:14:13 -070046057 GemmMicrokernelTester()
46058 .mr(1)
46059 .nr(2)
46060 .kr(1)
46061 .sr(1)
46062 .m(m)
46063 .n(n)
46064 .k(k)
46065 .cm_stride(5)
46066 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080046067 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070046068 }
46069 }
46070 }
46071}
46072
Marat Dukhan2ac722e2022-01-04 01:54:20 -080046073TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, qmin) {
Marat Dukhan779b2532021-06-29 14:14:13 -070046074 GemmMicrokernelTester()
46075 .mr(1)
46076 .nr(2)
46077 .kr(1)
46078 .sr(1)
46079 .m(1)
46080 .n(2)
46081 .k(1)
46082 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080046083 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070046084}
46085
Marat Dukhan2ac722e2022-01-04 01:54:20 -080046086TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, qmax) {
Marat Dukhan779b2532021-06-29 14:14:13 -070046087 GemmMicrokernelTester()
46088 .mr(1)
46089 .nr(2)
46090 .kr(1)
46091 .sr(1)
46092 .m(1)
46093 .n(2)
46094 .k(1)
46095 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080046096 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070046097}
46098
Marat Dukhan2ac722e2022-01-04 01:54:20 -080046099TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, strided_cm) {
Marat Dukhan779b2532021-06-29 14:14:13 -070046100 GemmMicrokernelTester()
46101 .mr(1)
46102 .nr(2)
46103 .kr(1)
46104 .sr(1)
46105 .m(1)
46106 .n(2)
46107 .k(1)
46108 .cm_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080046109 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070046110}
46111
46112
Marat Dukhan2ac722e2022-01-04 01:54:20 -080046113TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, k_eq_1) {
Marat Dukhan779b2532021-06-29 14:14:13 -070046114 GemmMicrokernelTester()
46115 .mr(2)
46116 .nr(2)
46117 .kr(1)
46118 .sr(1)
46119 .m(2)
46120 .n(2)
46121 .k(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080046122 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070046123}
46124
Marat Dukhan2ac722e2022-01-04 01:54:20 -080046125TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, strided_cn) {
Marat Dukhan779b2532021-06-29 14:14:13 -070046126 GemmMicrokernelTester()
46127 .mr(2)
46128 .nr(2)
46129 .kr(1)
46130 .sr(1)
46131 .m(2)
46132 .n(2)
46133 .k(1)
46134 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080046135 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070046136}
46137
Marat Dukhan2ac722e2022-01-04 01:54:20 -080046138TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, k_eq_1_strided_a) {
Marat Dukhan779b2532021-06-29 14:14:13 -070046139 GemmMicrokernelTester()
46140 .mr(2)
46141 .nr(2)
46142 .kr(1)
46143 .sr(1)
46144 .m(2)
46145 .n(2)
46146 .k(1)
46147 .a_stride(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080046148 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070046149}
46150
Marat Dukhan2ac722e2022-01-04 01:54:20 -080046151TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080046152 for (uint32_t n = 1; n <= 2; n++) {
46153 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan779b2532021-06-29 14:14:13 -070046154 GemmMicrokernelTester()
46155 .mr(2)
46156 .nr(2)
46157 .kr(1)
46158 .sr(1)
46159 .m(m)
46160 .n(n)
46161 .k(1)
46162 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080046163 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070046164 }
46165 }
46166}
46167
Marat Dukhan2ac722e2022-01-04 01:54:20 -080046168TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, k_eq_1_subtile_m) {
Marat Dukhan779b2532021-06-29 14:14:13 -070046169 for (uint32_t m = 1; m <= 2; m++) {
46170 GemmMicrokernelTester()
46171 .mr(2)
46172 .nr(2)
46173 .kr(1)
46174 .sr(1)
46175 .m(m)
46176 .n(2)
46177 .k(1)
46178 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080046179 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070046180 }
46181}
46182
Marat Dukhan2ac722e2022-01-04 01:54:20 -080046183TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, k_eq_1_subtile_n) {
Marat Dukhan779b2532021-06-29 14:14:13 -070046184 for (uint32_t n = 1; n <= 2; n++) {
46185 GemmMicrokernelTester()
46186 .mr(2)
46187 .nr(2)
46188 .kr(1)
46189 .sr(1)
46190 .m(2)
46191 .n(n)
46192 .k(1)
46193 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080046194 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070046195 }
46196}
46197
Marat Dukhan2ac722e2022-01-04 01:54:20 -080046198TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, k_gt_1) {
Marat Dukhan779b2532021-06-29 14:14:13 -070046199 for (size_t k = 2; k < 10; k++) {
46200 GemmMicrokernelTester()
46201 .mr(2)
46202 .nr(2)
46203 .kr(1)
46204 .sr(1)
46205 .m(2)
46206 .n(2)
46207 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080046208 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070046209 }
46210}
46211
Marat Dukhan2ac722e2022-01-04 01:54:20 -080046212TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, k_gt_1_strided_a) {
Marat Dukhan779b2532021-06-29 14:14:13 -070046213 for (size_t k = 2; k < 10; k++) {
46214 GemmMicrokernelTester()
46215 .mr(2)
46216 .nr(2)
46217 .kr(1)
46218 .sr(1)
46219 .m(2)
46220 .n(2)
46221 .k(k)
46222 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080046223 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070046224 }
46225}
46226
Marat Dukhan2ac722e2022-01-04 01:54:20 -080046227TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, k_gt_1_subtile) {
Marat Dukhan779b2532021-06-29 14:14:13 -070046228 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080046229 for (uint32_t n = 1; n <= 2; n++) {
46230 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan779b2532021-06-29 14:14:13 -070046231 GemmMicrokernelTester()
46232 .mr(2)
46233 .nr(2)
46234 .kr(1)
46235 .sr(1)
46236 .m(m)
46237 .n(n)
46238 .k(k)
46239 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080046240 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070046241 }
46242 }
46243 }
46244}
46245
Marat Dukhan2ac722e2022-01-04 01:54:20 -080046246TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_gt_2) {
Marat Dukhan779b2532021-06-29 14:14:13 -070046247 for (uint32_t n = 3; n < 4; n++) {
46248 for (size_t k = 1; k <= 5; k += 2) {
46249 GemmMicrokernelTester()
46250 .mr(2)
46251 .nr(2)
46252 .kr(1)
46253 .sr(1)
46254 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080046255 .n(n)
Marat Dukhan779b2532021-06-29 14:14:13 -070046256 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080046257 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070046258 }
46259 }
46260}
46261
Marat Dukhan2ac722e2022-01-04 01:54:20 -080046262TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_gt_2_strided_cn) {
Marat Dukhan779b2532021-06-29 14:14:13 -070046263 for (uint32_t n = 3; n < 4; n++) {
46264 for (size_t k = 1; k <= 5; k += 2) {
46265 GemmMicrokernelTester()
46266 .mr(2)
46267 .nr(2)
46268 .kr(1)
46269 .sr(1)
46270 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080046271 .n(n)
Marat Dukhan779b2532021-06-29 14:14:13 -070046272 .k(k)
46273 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080046274 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070046275 }
46276 }
46277}
46278
Marat Dukhan2ac722e2022-01-04 01:54:20 -080046279TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_gt_2_strided_a) {
Marat Dukhan779b2532021-06-29 14:14:13 -070046280 for (uint32_t n = 3; n < 4; n++) {
46281 for (size_t k = 1; k <= 5; k += 2) {
46282 GemmMicrokernelTester()
46283 .mr(2)
46284 .nr(2)
46285 .kr(1)
46286 .sr(1)
46287 .m(2)
46288 .n(n)
46289 .k(k)
46290 .a_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080046291 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070046292 }
46293 }
46294}
46295
Marat Dukhan2ac722e2022-01-04 01:54:20 -080046296TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_gt_2_subtile) {
Marat Dukhan779b2532021-06-29 14:14:13 -070046297 for (uint32_t n = 3; n < 4; n++) {
46298 for (size_t k = 1; k <= 5; k += 2) {
46299 for (uint32_t m = 1; m <= 2; m++) {
46300 GemmMicrokernelTester()
46301 .mr(2)
46302 .nr(2)
46303 .kr(1)
46304 .sr(1)
46305 .m(m)
46306 .n(n)
46307 .k(k)
46308 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080046309 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070046310 }
46311 }
46312 }
46313}
46314
Marat Dukhan2ac722e2022-01-04 01:54:20 -080046315TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_div_2) {
Marat Dukhan779b2532021-06-29 14:14:13 -070046316 for (uint32_t n = 4; n <= 6; n += 2) {
46317 for (size_t k = 1; k <= 5; k += 2) {
46318 GemmMicrokernelTester()
46319 .mr(2)
46320 .nr(2)
46321 .kr(1)
46322 .sr(1)
46323 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080046324 .n(n)
Marat Dukhan779b2532021-06-29 14:14:13 -070046325 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080046326 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070046327 }
46328 }
46329}
46330
Marat Dukhan2ac722e2022-01-04 01:54:20 -080046331TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_div_2_strided_cn) {
Marat Dukhan779b2532021-06-29 14:14:13 -070046332 for (uint32_t n = 4; n <= 6; n += 2) {
46333 for (size_t k = 1; k <= 5; k += 2) {
46334 GemmMicrokernelTester()
46335 .mr(2)
46336 .nr(2)
46337 .kr(1)
46338 .sr(1)
46339 .m(2)
46340 .n(n)
46341 .k(k)
46342 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080046343 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070046344 }
46345 }
46346}
46347
Marat Dukhan2ac722e2022-01-04 01:54:20 -080046348TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_div_2_strided_a) {
Marat Dukhan779b2532021-06-29 14:14:13 -070046349 for (uint32_t n = 4; n <= 6; n += 2) {
46350 for (size_t k = 1; k <= 5; k += 2) {
46351 GemmMicrokernelTester()
46352 .mr(2)
46353 .nr(2)
46354 .kr(1)
46355 .sr(1)
46356 .m(2)
46357 .n(n)
46358 .k(k)
46359 .a_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080046360 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070046361 }
46362 }
46363}
46364
Marat Dukhan2ac722e2022-01-04 01:54:20 -080046365TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_div_2_subtile) {
Marat Dukhan779b2532021-06-29 14:14:13 -070046366 for (uint32_t n = 4; n <= 6; n += 2) {
46367 for (size_t k = 1; k <= 5; k += 2) {
46368 for (uint32_t m = 1; m <= 2; m++) {
46369 GemmMicrokernelTester()
46370 .mr(2)
46371 .nr(2)
46372 .kr(1)
46373 .sr(1)
46374 .m(m)
46375 .n(n)
46376 .k(k)
46377 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080046378 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070046379 }
46380 }
46381 }
46382}
46383
Marat Dukhan2ac722e2022-01-04 01:54:20 -080046384TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, strided_cm_subtile) {
Marat Dukhan779b2532021-06-29 14:14:13 -070046385 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080046386 for (uint32_t n = 1; n <= 2; n++) {
46387 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan779b2532021-06-29 14:14:13 -070046388 GemmMicrokernelTester()
46389 .mr(2)
46390 .nr(2)
46391 .kr(1)
46392 .sr(1)
46393 .m(m)
46394 .n(n)
46395 .k(k)
46396 .cm_stride(5)
46397 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080046398 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070046399 }
46400 }
46401 }
46402}
46403
Marat Dukhan2ac722e2022-01-04 01:54:20 -080046404TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, qmin) {
Marat Dukhan779b2532021-06-29 14:14:13 -070046405 GemmMicrokernelTester()
46406 .mr(2)
46407 .nr(2)
46408 .kr(1)
46409 .sr(1)
46410 .m(2)
46411 .n(2)
46412 .k(1)
46413 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080046414 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070046415}
46416
Marat Dukhan2ac722e2022-01-04 01:54:20 -080046417TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, qmax) {
Marat Dukhan779b2532021-06-29 14:14:13 -070046418 GemmMicrokernelTester()
46419 .mr(2)
46420 .nr(2)
46421 .kr(1)
46422 .sr(1)
46423 .m(2)
46424 .n(2)
46425 .k(1)
46426 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080046427 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070046428}
46429
Marat Dukhan2ac722e2022-01-04 01:54:20 -080046430TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, strided_cm) {
Marat Dukhan779b2532021-06-29 14:14:13 -070046431 GemmMicrokernelTester()
46432 .mr(2)
46433 .nr(2)
46434 .kr(1)
46435 .sr(1)
46436 .m(2)
46437 .n(2)
46438 .k(1)
46439 .cm_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080046440 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070046441}
46442
46443
Marat Dukhan2ac722e2022-01-04 01:54:20 -080046444TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, k_eq_1) {
Marat Dukhan779b2532021-06-29 14:14:13 -070046445 GemmMicrokernelTester()
46446 .mr(1)
46447 .nr(4)
46448 .kr(1)
46449 .sr(1)
46450 .m(1)
46451 .n(4)
46452 .k(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080046453 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070046454}
46455
Marat Dukhan2ac722e2022-01-04 01:54:20 -080046456TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, strided_cn) {
Marat Dukhan779b2532021-06-29 14:14:13 -070046457 GemmMicrokernelTester()
46458 .mr(1)
46459 .nr(4)
46460 .kr(1)
46461 .sr(1)
46462 .m(1)
46463 .n(4)
46464 .k(1)
46465 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080046466 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070046467}
46468
Marat Dukhan2ac722e2022-01-04 01:54:20 -080046469TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, k_eq_1_strided_a) {
Marat Dukhan779b2532021-06-29 14:14:13 -070046470 GemmMicrokernelTester()
46471 .mr(1)
46472 .nr(4)
46473 .kr(1)
46474 .sr(1)
46475 .m(1)
46476 .n(4)
46477 .k(1)
46478 .a_stride(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080046479 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070046480}
46481
Marat Dukhan2ac722e2022-01-04 01:54:20 -080046482TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080046483 for (uint32_t n = 1; n <= 4; n++) {
46484 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan779b2532021-06-29 14:14:13 -070046485 GemmMicrokernelTester()
46486 .mr(1)
46487 .nr(4)
46488 .kr(1)
46489 .sr(1)
46490 .m(m)
46491 .n(n)
46492 .k(1)
46493 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080046494 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070046495 }
46496 }
46497}
46498
Marat Dukhan2ac722e2022-01-04 01:54:20 -080046499TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, k_eq_1_subtile_m) {
Marat Dukhan779b2532021-06-29 14:14:13 -070046500 for (uint32_t m = 1; m <= 1; m++) {
46501 GemmMicrokernelTester()
46502 .mr(1)
46503 .nr(4)
46504 .kr(1)
46505 .sr(1)
46506 .m(m)
46507 .n(4)
46508 .k(1)
46509 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080046510 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070046511 }
46512}
46513
Marat Dukhan2ac722e2022-01-04 01:54:20 -080046514TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, k_eq_1_subtile_n) {
Marat Dukhan779b2532021-06-29 14:14:13 -070046515 for (uint32_t n = 1; n <= 4; n++) {
46516 GemmMicrokernelTester()
46517 .mr(1)
46518 .nr(4)
46519 .kr(1)
46520 .sr(1)
46521 .m(1)
46522 .n(n)
46523 .k(1)
46524 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080046525 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070046526 }
46527}
46528
Marat Dukhan2ac722e2022-01-04 01:54:20 -080046529TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, k_gt_1) {
Marat Dukhan779b2532021-06-29 14:14:13 -070046530 for (size_t k = 2; k < 10; k++) {
46531 GemmMicrokernelTester()
46532 .mr(1)
46533 .nr(4)
46534 .kr(1)
46535 .sr(1)
46536 .m(1)
46537 .n(4)
46538 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080046539 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070046540 }
46541}
46542
Marat Dukhan2ac722e2022-01-04 01:54:20 -080046543TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, k_gt_1_strided_a) {
Marat Dukhan779b2532021-06-29 14:14:13 -070046544 for (size_t k = 2; k < 10; k++) {
46545 GemmMicrokernelTester()
46546 .mr(1)
46547 .nr(4)
46548 .kr(1)
46549 .sr(1)
46550 .m(1)
46551 .n(4)
46552 .k(k)
46553 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080046554 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070046555 }
46556}
46557
Marat Dukhan2ac722e2022-01-04 01:54:20 -080046558TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, k_gt_1_subtile) {
Marat Dukhan779b2532021-06-29 14:14:13 -070046559 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080046560 for (uint32_t n = 1; n <= 4; n++) {
46561 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan779b2532021-06-29 14:14:13 -070046562 GemmMicrokernelTester()
46563 .mr(1)
46564 .nr(4)
46565 .kr(1)
46566 .sr(1)
46567 .m(m)
46568 .n(n)
46569 .k(k)
46570 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080046571 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070046572 }
46573 }
46574 }
46575}
46576
Marat Dukhan2ac722e2022-01-04 01:54:20 -080046577TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, n_gt_4) {
Marat Dukhan779b2532021-06-29 14:14:13 -070046578 for (uint32_t n = 5; n < 8; n++) {
46579 for (size_t k = 1; k <= 5; k += 2) {
46580 GemmMicrokernelTester()
46581 .mr(1)
46582 .nr(4)
46583 .kr(1)
46584 .sr(1)
46585 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080046586 .n(n)
Marat Dukhan779b2532021-06-29 14:14:13 -070046587 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080046588 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070046589 }
46590 }
46591}
46592
Marat Dukhan2ac722e2022-01-04 01:54:20 -080046593TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, n_gt_4_strided_cn) {
Marat Dukhan779b2532021-06-29 14:14:13 -070046594 for (uint32_t n = 5; n < 8; n++) {
46595 for (size_t k = 1; k <= 5; k += 2) {
46596 GemmMicrokernelTester()
46597 .mr(1)
46598 .nr(4)
46599 .kr(1)
46600 .sr(1)
46601 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080046602 .n(n)
Marat Dukhan779b2532021-06-29 14:14:13 -070046603 .k(k)
46604 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080046605 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070046606 }
46607 }
46608}
46609
Marat Dukhan2ac722e2022-01-04 01:54:20 -080046610TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, n_gt_4_strided_a) {
Marat Dukhan779b2532021-06-29 14:14:13 -070046611 for (uint32_t n = 5; n < 8; n++) {
46612 for (size_t k = 1; k <= 5; k += 2) {
46613 GemmMicrokernelTester()
46614 .mr(1)
46615 .nr(4)
46616 .kr(1)
46617 .sr(1)
46618 .m(1)
46619 .n(n)
46620 .k(k)
46621 .a_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080046622 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070046623 }
46624 }
46625}
46626
Marat Dukhan2ac722e2022-01-04 01:54:20 -080046627TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, n_gt_4_subtile) {
Marat Dukhan779b2532021-06-29 14:14:13 -070046628 for (uint32_t n = 5; n < 8; n++) {
46629 for (size_t k = 1; k <= 5; k += 2) {
46630 for (uint32_t m = 1; m <= 1; m++) {
46631 GemmMicrokernelTester()
46632 .mr(1)
46633 .nr(4)
46634 .kr(1)
46635 .sr(1)
46636 .m(m)
46637 .n(n)
46638 .k(k)
46639 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080046640 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070046641 }
46642 }
46643 }
46644}
46645
Marat Dukhan2ac722e2022-01-04 01:54:20 -080046646TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, n_div_4) {
Marat Dukhan779b2532021-06-29 14:14:13 -070046647 for (uint32_t n = 8; n <= 12; n += 4) {
46648 for (size_t k = 1; k <= 5; k += 2) {
46649 GemmMicrokernelTester()
46650 .mr(1)
46651 .nr(4)
46652 .kr(1)
46653 .sr(1)
46654 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080046655 .n(n)
Marat Dukhan779b2532021-06-29 14:14:13 -070046656 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080046657 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070046658 }
46659 }
46660}
46661
Marat Dukhan2ac722e2022-01-04 01:54:20 -080046662TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, n_div_4_strided_cn) {
Marat Dukhan779b2532021-06-29 14:14:13 -070046663 for (uint32_t n = 8; n <= 12; n += 4) {
46664 for (size_t k = 1; k <= 5; k += 2) {
46665 GemmMicrokernelTester()
46666 .mr(1)
46667 .nr(4)
46668 .kr(1)
46669 .sr(1)
46670 .m(1)
46671 .n(n)
46672 .k(k)
46673 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080046674 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070046675 }
46676 }
46677}
46678
Marat Dukhan2ac722e2022-01-04 01:54:20 -080046679TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, n_div_4_strided_a) {
Marat Dukhan779b2532021-06-29 14:14:13 -070046680 for (uint32_t n = 8; n <= 12; n += 4) {
46681 for (size_t k = 1; k <= 5; k += 2) {
46682 GemmMicrokernelTester()
46683 .mr(1)
46684 .nr(4)
46685 .kr(1)
46686 .sr(1)
46687 .m(1)
46688 .n(n)
46689 .k(k)
46690 .a_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080046691 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070046692 }
46693 }
46694}
46695
Marat Dukhan2ac722e2022-01-04 01:54:20 -080046696TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, n_div_4_subtile) {
Marat Dukhan779b2532021-06-29 14:14:13 -070046697 for (uint32_t n = 8; n <= 12; n += 4) {
46698 for (size_t k = 1; k <= 5; k += 2) {
46699 for (uint32_t m = 1; m <= 1; m++) {
46700 GemmMicrokernelTester()
46701 .mr(1)
46702 .nr(4)
46703 .kr(1)
46704 .sr(1)
46705 .m(m)
46706 .n(n)
46707 .k(k)
46708 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080046709 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070046710 }
46711 }
46712 }
46713}
46714
Marat Dukhan2ac722e2022-01-04 01:54:20 -080046715TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, strided_cm_subtile) {
Marat Dukhan779b2532021-06-29 14:14:13 -070046716 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080046717 for (uint32_t n = 1; n <= 4; n++) {
46718 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan779b2532021-06-29 14:14:13 -070046719 GemmMicrokernelTester()
46720 .mr(1)
46721 .nr(4)
46722 .kr(1)
46723 .sr(1)
46724 .m(m)
46725 .n(n)
46726 .k(k)
46727 .cm_stride(7)
46728 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080046729 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070046730 }
46731 }
46732 }
46733}
46734
Marat Dukhan2ac722e2022-01-04 01:54:20 -080046735TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, qmin) {
Marat Dukhan779b2532021-06-29 14:14:13 -070046736 GemmMicrokernelTester()
46737 .mr(1)
46738 .nr(4)
46739 .kr(1)
46740 .sr(1)
46741 .m(1)
46742 .n(4)
46743 .k(1)
46744 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080046745 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070046746}
46747
Marat Dukhan2ac722e2022-01-04 01:54:20 -080046748TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, qmax) {
Marat Dukhan779b2532021-06-29 14:14:13 -070046749 GemmMicrokernelTester()
46750 .mr(1)
46751 .nr(4)
46752 .kr(1)
46753 .sr(1)
46754 .m(1)
46755 .n(4)
46756 .k(1)
46757 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080046758 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070046759}
46760
Marat Dukhan2ac722e2022-01-04 01:54:20 -080046761TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, strided_cm) {
Marat Dukhan779b2532021-06-29 14:14:13 -070046762 GemmMicrokernelTester()
46763 .mr(1)
46764 .nr(4)
46765 .kr(1)
46766 .sr(1)
46767 .m(1)
46768 .n(4)
46769 .k(1)
46770 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080046771 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070046772}
46773
46774
Marat Dukhan2ac722e2022-01-04 01:54:20 -080046775TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, k_eq_1) {
Marat Dukhan779b2532021-06-29 14:14:13 -070046776 GemmMicrokernelTester()
46777 .mr(2)
46778 .nr(4)
46779 .kr(1)
46780 .sr(1)
46781 .m(2)
46782 .n(4)
46783 .k(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080046784 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070046785}
46786
Marat Dukhan2ac722e2022-01-04 01:54:20 -080046787TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, strided_cn) {
Marat Dukhan779b2532021-06-29 14:14:13 -070046788 GemmMicrokernelTester()
46789 .mr(2)
46790 .nr(4)
46791 .kr(1)
46792 .sr(1)
46793 .m(2)
46794 .n(4)
46795 .k(1)
46796 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080046797 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070046798}
46799
Marat Dukhan2ac722e2022-01-04 01:54:20 -080046800TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, k_eq_1_strided_a) {
Marat Dukhan779b2532021-06-29 14:14:13 -070046801 GemmMicrokernelTester()
46802 .mr(2)
46803 .nr(4)
46804 .kr(1)
46805 .sr(1)
46806 .m(2)
46807 .n(4)
46808 .k(1)
46809 .a_stride(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080046810 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070046811}
46812
Marat Dukhan2ac722e2022-01-04 01:54:20 -080046813TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080046814 for (uint32_t n = 1; n <= 4; n++) {
46815 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan779b2532021-06-29 14:14:13 -070046816 GemmMicrokernelTester()
46817 .mr(2)
46818 .nr(4)
46819 .kr(1)
46820 .sr(1)
46821 .m(m)
46822 .n(n)
46823 .k(1)
46824 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080046825 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070046826 }
46827 }
46828}
46829
Marat Dukhan2ac722e2022-01-04 01:54:20 -080046830TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, k_eq_1_subtile_m) {
Marat Dukhan779b2532021-06-29 14:14:13 -070046831 for (uint32_t m = 1; m <= 2; m++) {
46832 GemmMicrokernelTester()
46833 .mr(2)
46834 .nr(4)
46835 .kr(1)
46836 .sr(1)
46837 .m(m)
46838 .n(4)
46839 .k(1)
46840 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080046841 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070046842 }
46843}
46844
Marat Dukhan2ac722e2022-01-04 01:54:20 -080046845TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, k_eq_1_subtile_n) {
Marat Dukhan779b2532021-06-29 14:14:13 -070046846 for (uint32_t n = 1; n <= 4; n++) {
46847 GemmMicrokernelTester()
46848 .mr(2)
46849 .nr(4)
46850 .kr(1)
46851 .sr(1)
46852 .m(2)
46853 .n(n)
46854 .k(1)
46855 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080046856 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070046857 }
46858}
46859
Marat Dukhan2ac722e2022-01-04 01:54:20 -080046860TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, k_gt_1) {
Marat Dukhan779b2532021-06-29 14:14:13 -070046861 for (size_t k = 2; k < 10; k++) {
46862 GemmMicrokernelTester()
46863 .mr(2)
46864 .nr(4)
46865 .kr(1)
46866 .sr(1)
46867 .m(2)
46868 .n(4)
46869 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080046870 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070046871 }
46872}
46873
Marat Dukhan2ac722e2022-01-04 01:54:20 -080046874TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, k_gt_1_strided_a) {
Marat Dukhan779b2532021-06-29 14:14:13 -070046875 for (size_t k = 2; k < 10; k++) {
46876 GemmMicrokernelTester()
46877 .mr(2)
46878 .nr(4)
46879 .kr(1)
46880 .sr(1)
46881 .m(2)
46882 .n(4)
46883 .k(k)
46884 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080046885 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070046886 }
46887}
46888
Marat Dukhan2ac722e2022-01-04 01:54:20 -080046889TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, k_gt_1_subtile) {
Marat Dukhan779b2532021-06-29 14:14:13 -070046890 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080046891 for (uint32_t n = 1; n <= 4; n++) {
46892 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan779b2532021-06-29 14:14:13 -070046893 GemmMicrokernelTester()
46894 .mr(2)
46895 .nr(4)
46896 .kr(1)
46897 .sr(1)
46898 .m(m)
46899 .n(n)
46900 .k(k)
46901 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080046902 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070046903 }
46904 }
46905 }
46906}
46907
Marat Dukhan2ac722e2022-01-04 01:54:20 -080046908TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, n_gt_4) {
Marat Dukhan779b2532021-06-29 14:14:13 -070046909 for (uint32_t n = 5; n < 8; n++) {
46910 for (size_t k = 1; k <= 5; k += 2) {
46911 GemmMicrokernelTester()
46912 .mr(2)
46913 .nr(4)
46914 .kr(1)
46915 .sr(1)
46916 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080046917 .n(n)
Marat Dukhan779b2532021-06-29 14:14:13 -070046918 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080046919 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070046920 }
46921 }
46922}
46923
Marat Dukhan2ac722e2022-01-04 01:54:20 -080046924TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, n_gt_4_strided_cn) {
Marat Dukhan779b2532021-06-29 14:14:13 -070046925 for (uint32_t n = 5; n < 8; n++) {
46926 for (size_t k = 1; k <= 5; k += 2) {
46927 GemmMicrokernelTester()
46928 .mr(2)
46929 .nr(4)
46930 .kr(1)
46931 .sr(1)
46932 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080046933 .n(n)
Marat Dukhan779b2532021-06-29 14:14:13 -070046934 .k(k)
46935 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080046936 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070046937 }
46938 }
46939}
46940
Marat Dukhan2ac722e2022-01-04 01:54:20 -080046941TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, n_gt_4_strided_a) {
Marat Dukhan779b2532021-06-29 14:14:13 -070046942 for (uint32_t n = 5; n < 8; n++) {
46943 for (size_t k = 1; k <= 5; k += 2) {
46944 GemmMicrokernelTester()
46945 .mr(2)
46946 .nr(4)
46947 .kr(1)
46948 .sr(1)
46949 .m(2)
46950 .n(n)
46951 .k(k)
46952 .a_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080046953 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070046954 }
46955 }
46956}
46957
Marat Dukhan2ac722e2022-01-04 01:54:20 -080046958TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, n_gt_4_subtile) {
Marat Dukhan779b2532021-06-29 14:14:13 -070046959 for (uint32_t n = 5; n < 8; n++) {
46960 for (size_t k = 1; k <= 5; k += 2) {
46961 for (uint32_t m = 1; m <= 2; m++) {
46962 GemmMicrokernelTester()
46963 .mr(2)
46964 .nr(4)
46965 .kr(1)
46966 .sr(1)
46967 .m(m)
46968 .n(n)
46969 .k(k)
46970 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080046971 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070046972 }
46973 }
46974 }
46975}
46976
Marat Dukhan2ac722e2022-01-04 01:54:20 -080046977TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, n_div_4) {
Marat Dukhan779b2532021-06-29 14:14:13 -070046978 for (uint32_t n = 8; n <= 12; n += 4) {
46979 for (size_t k = 1; k <= 5; k += 2) {
46980 GemmMicrokernelTester()
46981 .mr(2)
46982 .nr(4)
46983 .kr(1)
46984 .sr(1)
46985 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080046986 .n(n)
Marat Dukhan779b2532021-06-29 14:14:13 -070046987 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080046988 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070046989 }
46990 }
46991}
46992
Marat Dukhan2ac722e2022-01-04 01:54:20 -080046993TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, n_div_4_strided_cn) {
Marat Dukhan779b2532021-06-29 14:14:13 -070046994 for (uint32_t n = 8; n <= 12; n += 4) {
46995 for (size_t k = 1; k <= 5; k += 2) {
46996 GemmMicrokernelTester()
46997 .mr(2)
46998 .nr(4)
46999 .kr(1)
47000 .sr(1)
47001 .m(2)
47002 .n(n)
47003 .k(k)
47004 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080047005 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070047006 }
47007 }
47008}
47009
Marat Dukhan2ac722e2022-01-04 01:54:20 -080047010TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, n_div_4_strided_a) {
Marat Dukhan779b2532021-06-29 14:14:13 -070047011 for (uint32_t n = 8; n <= 12; n += 4) {
47012 for (size_t k = 1; k <= 5; k += 2) {
47013 GemmMicrokernelTester()
47014 .mr(2)
47015 .nr(4)
47016 .kr(1)
47017 .sr(1)
47018 .m(2)
47019 .n(n)
47020 .k(k)
47021 .a_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080047022 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070047023 }
47024 }
47025}
47026
Marat Dukhan2ac722e2022-01-04 01:54:20 -080047027TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, n_div_4_subtile) {
Marat Dukhan779b2532021-06-29 14:14:13 -070047028 for (uint32_t n = 8; n <= 12; n += 4) {
47029 for (size_t k = 1; k <= 5; k += 2) {
47030 for (uint32_t m = 1; m <= 2; m++) {
47031 GemmMicrokernelTester()
47032 .mr(2)
47033 .nr(4)
47034 .kr(1)
47035 .sr(1)
47036 .m(m)
47037 .n(n)
47038 .k(k)
47039 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080047040 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070047041 }
47042 }
47043 }
47044}
47045
Marat Dukhan2ac722e2022-01-04 01:54:20 -080047046TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, strided_cm_subtile) {
Marat Dukhan779b2532021-06-29 14:14:13 -070047047 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080047048 for (uint32_t n = 1; n <= 4; n++) {
47049 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan779b2532021-06-29 14:14:13 -070047050 GemmMicrokernelTester()
47051 .mr(2)
47052 .nr(4)
47053 .kr(1)
47054 .sr(1)
47055 .m(m)
47056 .n(n)
47057 .k(k)
47058 .cm_stride(7)
47059 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080047060 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070047061 }
47062 }
47063 }
47064}
47065
Marat Dukhan2ac722e2022-01-04 01:54:20 -080047066TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, qmin) {
Marat Dukhan779b2532021-06-29 14:14:13 -070047067 GemmMicrokernelTester()
47068 .mr(2)
47069 .nr(4)
47070 .kr(1)
47071 .sr(1)
47072 .m(2)
47073 .n(4)
47074 .k(1)
47075 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080047076 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070047077}
47078
Marat Dukhan2ac722e2022-01-04 01:54:20 -080047079TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, qmax) {
Marat Dukhan779b2532021-06-29 14:14:13 -070047080 GemmMicrokernelTester()
47081 .mr(2)
47082 .nr(4)
47083 .kr(1)
47084 .sr(1)
47085 .m(2)
47086 .n(4)
47087 .k(1)
47088 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080047089 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070047090}
47091
Marat Dukhan2ac722e2022-01-04 01:54:20 -080047092TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, strided_cm) {
Marat Dukhan779b2532021-06-29 14:14:13 -070047093 GemmMicrokernelTester()
47094 .mr(2)
47095 .nr(4)
47096 .kr(1)
47097 .sr(1)
47098 .m(2)
47099 .n(4)
47100 .k(1)
47101 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080047102 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan779b2532021-06-29 14:14:13 -070047103}
47104
47105
Marat Dukhan272d4d92022-01-04 15:07:14 -080047106TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_eq_1) {
47107 GemmMicrokernelTester()
47108 .mr(3)
47109 .nr(2)
47110 .kr(1)
47111 .sr(1)
47112 .m(3)
47113 .n(2)
47114 .k(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080047115 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080047116}
47117
47118TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, strided_cn) {
47119 GemmMicrokernelTester()
47120 .mr(3)
47121 .nr(2)
47122 .kr(1)
47123 .sr(1)
47124 .m(3)
47125 .n(2)
47126 .k(1)
47127 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080047128 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080047129}
47130
47131TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_eq_1_strided_a) {
47132 GemmMicrokernelTester()
47133 .mr(3)
47134 .nr(2)
47135 .kr(1)
47136 .sr(1)
47137 .m(3)
47138 .n(2)
47139 .k(1)
47140 .a_stride(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080047141 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080047142}
47143
47144TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080047145 for (uint32_t n = 1; n <= 2; n++) {
47146 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080047147 GemmMicrokernelTester()
47148 .mr(3)
47149 .nr(2)
47150 .kr(1)
47151 .sr(1)
47152 .m(m)
47153 .n(n)
47154 .k(1)
47155 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080047156 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080047157 }
47158 }
47159}
47160
47161TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_eq_1_subtile_m) {
47162 for (uint32_t m = 1; m <= 3; m++) {
47163 GemmMicrokernelTester()
47164 .mr(3)
47165 .nr(2)
47166 .kr(1)
47167 .sr(1)
47168 .m(m)
47169 .n(2)
47170 .k(1)
47171 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080047172 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080047173 }
47174}
47175
47176TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_eq_1_subtile_n) {
47177 for (uint32_t n = 1; n <= 2; n++) {
47178 GemmMicrokernelTester()
47179 .mr(3)
47180 .nr(2)
47181 .kr(1)
47182 .sr(1)
47183 .m(3)
47184 .n(n)
47185 .k(1)
47186 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080047187 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080047188 }
47189}
47190
47191TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_gt_1) {
47192 for (size_t k = 2; k < 10; k++) {
47193 GemmMicrokernelTester()
47194 .mr(3)
47195 .nr(2)
47196 .kr(1)
47197 .sr(1)
47198 .m(3)
47199 .n(2)
47200 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080047201 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080047202 }
47203}
47204
47205TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_gt_1_strided_a) {
47206 for (size_t k = 2; k < 10; k++) {
47207 GemmMicrokernelTester()
47208 .mr(3)
47209 .nr(2)
47210 .kr(1)
47211 .sr(1)
47212 .m(3)
47213 .n(2)
47214 .k(k)
47215 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080047216 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080047217 }
47218}
47219
47220TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_gt_1_subtile) {
47221 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080047222 for (uint32_t n = 1; n <= 2; n++) {
47223 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080047224 GemmMicrokernelTester()
47225 .mr(3)
47226 .nr(2)
47227 .kr(1)
47228 .sr(1)
47229 .m(m)
47230 .n(n)
47231 .k(k)
47232 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080047233 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080047234 }
47235 }
47236 }
47237}
47238
47239TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_gt_2) {
47240 for (uint32_t n = 3; n < 4; n++) {
47241 for (size_t k = 1; k <= 5; k += 2) {
47242 GemmMicrokernelTester()
47243 .mr(3)
47244 .nr(2)
47245 .kr(1)
47246 .sr(1)
47247 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080047248 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080047249 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080047250 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080047251 }
47252 }
47253}
47254
47255TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_gt_2_strided_cn) {
47256 for (uint32_t n = 3; n < 4; n++) {
47257 for (size_t k = 1; k <= 5; k += 2) {
47258 GemmMicrokernelTester()
47259 .mr(3)
47260 .nr(2)
47261 .kr(1)
47262 .sr(1)
47263 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080047264 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080047265 .k(k)
47266 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080047267 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080047268 }
47269 }
47270}
47271
47272TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_gt_2_strided_a) {
47273 for (uint32_t n = 3; n < 4; n++) {
47274 for (size_t k = 1; k <= 5; k += 2) {
47275 GemmMicrokernelTester()
47276 .mr(3)
47277 .nr(2)
47278 .kr(1)
47279 .sr(1)
47280 .m(3)
47281 .n(n)
47282 .k(k)
47283 .a_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080047284 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080047285 }
47286 }
47287}
47288
47289TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_gt_2_subtile) {
47290 for (uint32_t n = 3; n < 4; n++) {
47291 for (size_t k = 1; k <= 5; k += 2) {
47292 for (uint32_t m = 1; m <= 3; m++) {
47293 GemmMicrokernelTester()
47294 .mr(3)
47295 .nr(2)
47296 .kr(1)
47297 .sr(1)
47298 .m(m)
47299 .n(n)
47300 .k(k)
47301 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080047302 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080047303 }
47304 }
47305 }
47306}
47307
47308TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_div_2) {
47309 for (uint32_t n = 4; n <= 6; n += 2) {
47310 for (size_t k = 1; k <= 5; k += 2) {
47311 GemmMicrokernelTester()
47312 .mr(3)
47313 .nr(2)
47314 .kr(1)
47315 .sr(1)
47316 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080047317 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080047318 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080047319 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080047320 }
47321 }
47322}
47323
47324TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_div_2_strided_cn) {
47325 for (uint32_t n = 4; n <= 6; n += 2) {
47326 for (size_t k = 1; k <= 5; k += 2) {
47327 GemmMicrokernelTester()
47328 .mr(3)
47329 .nr(2)
47330 .kr(1)
47331 .sr(1)
47332 .m(3)
47333 .n(n)
47334 .k(k)
47335 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080047336 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080047337 }
47338 }
47339}
47340
47341TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_div_2_strided_a) {
47342 for (uint32_t n = 4; n <= 6; n += 2) {
47343 for (size_t k = 1; k <= 5; k += 2) {
47344 GemmMicrokernelTester()
47345 .mr(3)
47346 .nr(2)
47347 .kr(1)
47348 .sr(1)
47349 .m(3)
47350 .n(n)
47351 .k(k)
47352 .a_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080047353 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080047354 }
47355 }
47356}
47357
47358TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_div_2_subtile) {
47359 for (uint32_t n = 4; n <= 6; n += 2) {
47360 for (size_t k = 1; k <= 5; k += 2) {
47361 for (uint32_t m = 1; m <= 3; m++) {
47362 GemmMicrokernelTester()
47363 .mr(3)
47364 .nr(2)
47365 .kr(1)
47366 .sr(1)
47367 .m(m)
47368 .n(n)
47369 .k(k)
47370 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080047371 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080047372 }
47373 }
47374 }
47375}
47376
47377TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, strided_cm_subtile) {
47378 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080047379 for (uint32_t n = 1; n <= 2; n++) {
47380 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080047381 GemmMicrokernelTester()
47382 .mr(3)
47383 .nr(2)
47384 .kr(1)
47385 .sr(1)
47386 .m(m)
47387 .n(n)
47388 .k(k)
47389 .cm_stride(5)
47390 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080047391 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080047392 }
47393 }
47394 }
47395}
47396
47397TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, qmin) {
47398 GemmMicrokernelTester()
47399 .mr(3)
47400 .nr(2)
47401 .kr(1)
47402 .sr(1)
47403 .m(3)
47404 .n(2)
47405 .k(1)
47406 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080047407 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080047408}
47409
47410TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, qmax) {
47411 GemmMicrokernelTester()
47412 .mr(3)
47413 .nr(2)
47414 .kr(1)
47415 .sr(1)
47416 .m(3)
47417 .n(2)
47418 .k(1)
47419 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080047420 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080047421}
47422
47423TEST(QS8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, strided_cm) {
47424 GemmMicrokernelTester()
47425 .mr(3)
47426 .nr(2)
47427 .kr(1)
47428 .sr(1)
47429 .m(3)
47430 .n(2)
47431 .k(1)
47432 .cm_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080047433 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080047434}
47435
47436
47437TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_eq_1) {
47438 GemmMicrokernelTester()
47439 .mr(4)
47440 .nr(2)
47441 .kr(1)
47442 .sr(1)
47443 .m(4)
47444 .n(2)
47445 .k(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080047446 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080047447}
47448
47449TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, strided_cn) {
47450 GemmMicrokernelTester()
47451 .mr(4)
47452 .nr(2)
47453 .kr(1)
47454 .sr(1)
47455 .m(4)
47456 .n(2)
47457 .k(1)
47458 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080047459 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080047460}
47461
47462TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_eq_1_strided_a) {
47463 GemmMicrokernelTester()
47464 .mr(4)
47465 .nr(2)
47466 .kr(1)
47467 .sr(1)
47468 .m(4)
47469 .n(2)
47470 .k(1)
47471 .a_stride(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080047472 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080047473}
47474
47475TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080047476 for (uint32_t n = 1; n <= 2; n++) {
47477 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080047478 GemmMicrokernelTester()
47479 .mr(4)
47480 .nr(2)
47481 .kr(1)
47482 .sr(1)
47483 .m(m)
47484 .n(n)
47485 .k(1)
47486 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080047487 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080047488 }
47489 }
47490}
47491
47492TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_eq_1_subtile_m) {
47493 for (uint32_t m = 1; m <= 4; m++) {
47494 GemmMicrokernelTester()
47495 .mr(4)
47496 .nr(2)
47497 .kr(1)
47498 .sr(1)
47499 .m(m)
47500 .n(2)
47501 .k(1)
47502 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080047503 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080047504 }
47505}
47506
47507TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_eq_1_subtile_n) {
47508 for (uint32_t n = 1; n <= 2; n++) {
47509 GemmMicrokernelTester()
47510 .mr(4)
47511 .nr(2)
47512 .kr(1)
47513 .sr(1)
47514 .m(4)
47515 .n(n)
47516 .k(1)
47517 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080047518 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080047519 }
47520}
47521
47522TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_gt_1) {
47523 for (size_t k = 2; k < 10; k++) {
47524 GemmMicrokernelTester()
47525 .mr(4)
47526 .nr(2)
47527 .kr(1)
47528 .sr(1)
47529 .m(4)
47530 .n(2)
47531 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080047532 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080047533 }
47534}
47535
47536TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_gt_1_strided_a) {
47537 for (size_t k = 2; k < 10; k++) {
47538 GemmMicrokernelTester()
47539 .mr(4)
47540 .nr(2)
47541 .kr(1)
47542 .sr(1)
47543 .m(4)
47544 .n(2)
47545 .k(k)
47546 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080047547 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080047548 }
47549}
47550
47551TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_gt_1_subtile) {
47552 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080047553 for (uint32_t n = 1; n <= 2; n++) {
47554 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080047555 GemmMicrokernelTester()
47556 .mr(4)
47557 .nr(2)
47558 .kr(1)
47559 .sr(1)
47560 .m(m)
47561 .n(n)
47562 .k(k)
47563 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080047564 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080047565 }
47566 }
47567 }
47568}
47569
47570TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_gt_2) {
47571 for (uint32_t n = 3; n < 4; n++) {
47572 for (size_t k = 1; k <= 5; k += 2) {
47573 GemmMicrokernelTester()
47574 .mr(4)
47575 .nr(2)
47576 .kr(1)
47577 .sr(1)
47578 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080047579 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080047580 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080047581 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080047582 }
47583 }
47584}
47585
47586TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_gt_2_strided_cn) {
47587 for (uint32_t n = 3; n < 4; n++) {
47588 for (size_t k = 1; k <= 5; k += 2) {
47589 GemmMicrokernelTester()
47590 .mr(4)
47591 .nr(2)
47592 .kr(1)
47593 .sr(1)
47594 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080047595 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080047596 .k(k)
47597 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080047598 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080047599 }
47600 }
47601}
47602
47603TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_gt_2_strided_a) {
47604 for (uint32_t n = 3; n < 4; n++) {
47605 for (size_t k = 1; k <= 5; k += 2) {
47606 GemmMicrokernelTester()
47607 .mr(4)
47608 .nr(2)
47609 .kr(1)
47610 .sr(1)
47611 .m(4)
47612 .n(n)
47613 .k(k)
47614 .a_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080047615 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080047616 }
47617 }
47618}
47619
47620TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_gt_2_subtile) {
47621 for (uint32_t n = 3; n < 4; n++) {
47622 for (size_t k = 1; k <= 5; k += 2) {
47623 for (uint32_t m = 1; m <= 4; m++) {
47624 GemmMicrokernelTester()
47625 .mr(4)
47626 .nr(2)
47627 .kr(1)
47628 .sr(1)
47629 .m(m)
47630 .n(n)
47631 .k(k)
47632 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080047633 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080047634 }
47635 }
47636 }
47637}
47638
47639TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_div_2) {
47640 for (uint32_t n = 4; n <= 6; n += 2) {
47641 for (size_t k = 1; k <= 5; k += 2) {
47642 GemmMicrokernelTester()
47643 .mr(4)
47644 .nr(2)
47645 .kr(1)
47646 .sr(1)
47647 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080047648 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080047649 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080047650 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080047651 }
47652 }
47653}
47654
47655TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_div_2_strided_cn) {
47656 for (uint32_t n = 4; n <= 6; n += 2) {
47657 for (size_t k = 1; k <= 5; k += 2) {
47658 GemmMicrokernelTester()
47659 .mr(4)
47660 .nr(2)
47661 .kr(1)
47662 .sr(1)
47663 .m(4)
47664 .n(n)
47665 .k(k)
47666 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080047667 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080047668 }
47669 }
47670}
47671
47672TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_div_2_strided_a) {
47673 for (uint32_t n = 4; n <= 6; n += 2) {
47674 for (size_t k = 1; k <= 5; k += 2) {
47675 GemmMicrokernelTester()
47676 .mr(4)
47677 .nr(2)
47678 .kr(1)
47679 .sr(1)
47680 .m(4)
47681 .n(n)
47682 .k(k)
47683 .a_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080047684 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080047685 }
47686 }
47687}
47688
47689TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_div_2_subtile) {
47690 for (uint32_t n = 4; n <= 6; n += 2) {
47691 for (size_t k = 1; k <= 5; k += 2) {
47692 for (uint32_t m = 1; m <= 4; m++) {
47693 GemmMicrokernelTester()
47694 .mr(4)
47695 .nr(2)
47696 .kr(1)
47697 .sr(1)
47698 .m(m)
47699 .n(n)
47700 .k(k)
47701 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080047702 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080047703 }
47704 }
47705 }
47706}
47707
47708TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, strided_cm_subtile) {
47709 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080047710 for (uint32_t n = 1; n <= 2; n++) {
47711 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080047712 GemmMicrokernelTester()
47713 .mr(4)
47714 .nr(2)
47715 .kr(1)
47716 .sr(1)
47717 .m(m)
47718 .n(n)
47719 .k(k)
47720 .cm_stride(5)
47721 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080047722 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080047723 }
47724 }
47725 }
47726}
47727
47728TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, qmin) {
47729 GemmMicrokernelTester()
47730 .mr(4)
47731 .nr(2)
47732 .kr(1)
47733 .sr(1)
47734 .m(4)
47735 .n(2)
47736 .k(1)
47737 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080047738 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080047739}
47740
47741TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, qmax) {
47742 GemmMicrokernelTester()
47743 .mr(4)
47744 .nr(2)
47745 .kr(1)
47746 .sr(1)
47747 .m(4)
47748 .n(2)
47749 .k(1)
47750 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080047751 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080047752}
47753
47754TEST(QS8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, strided_cm) {
47755 GemmMicrokernelTester()
47756 .mr(4)
47757 .nr(2)
47758 .kr(1)
47759 .sr(1)
47760 .m(4)
47761 .n(2)
47762 .k(1)
47763 .cm_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080047764 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080047765}
47766
47767
Marat Dukhan272d4d92022-01-04 15:07:14 -080047768TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_eq_1) {
47769 GemmMicrokernelTester()
47770 .mr(3)
47771 .nr(4)
47772 .kr(1)
47773 .sr(1)
47774 .m(3)
47775 .n(4)
47776 .k(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080047777 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080047778}
47779
47780TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, strided_cn) {
47781 GemmMicrokernelTester()
47782 .mr(3)
47783 .nr(4)
47784 .kr(1)
47785 .sr(1)
47786 .m(3)
47787 .n(4)
47788 .k(1)
47789 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080047790 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080047791}
47792
47793TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_eq_1_strided_a) {
47794 GemmMicrokernelTester()
47795 .mr(3)
47796 .nr(4)
47797 .kr(1)
47798 .sr(1)
47799 .m(3)
47800 .n(4)
47801 .k(1)
47802 .a_stride(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080047803 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080047804}
47805
47806TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080047807 for (uint32_t n = 1; n <= 4; n++) {
47808 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080047809 GemmMicrokernelTester()
47810 .mr(3)
47811 .nr(4)
47812 .kr(1)
47813 .sr(1)
47814 .m(m)
47815 .n(n)
47816 .k(1)
47817 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080047818 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080047819 }
47820 }
47821}
47822
47823TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_eq_1_subtile_m) {
47824 for (uint32_t m = 1; m <= 3; m++) {
47825 GemmMicrokernelTester()
47826 .mr(3)
47827 .nr(4)
47828 .kr(1)
47829 .sr(1)
47830 .m(m)
47831 .n(4)
47832 .k(1)
47833 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080047834 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080047835 }
47836}
47837
47838TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_eq_1_subtile_n) {
47839 for (uint32_t n = 1; n <= 4; n++) {
47840 GemmMicrokernelTester()
47841 .mr(3)
47842 .nr(4)
47843 .kr(1)
47844 .sr(1)
47845 .m(3)
47846 .n(n)
47847 .k(1)
47848 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080047849 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080047850 }
47851}
47852
47853TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_gt_1) {
47854 for (size_t k = 2; k < 10; k++) {
47855 GemmMicrokernelTester()
47856 .mr(3)
47857 .nr(4)
47858 .kr(1)
47859 .sr(1)
47860 .m(3)
47861 .n(4)
47862 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080047863 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080047864 }
47865}
47866
47867TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_gt_1_strided_a) {
47868 for (size_t k = 2; k < 10; k++) {
47869 GemmMicrokernelTester()
47870 .mr(3)
47871 .nr(4)
47872 .kr(1)
47873 .sr(1)
47874 .m(3)
47875 .n(4)
47876 .k(k)
47877 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080047878 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080047879 }
47880}
47881
47882TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_gt_1_subtile) {
47883 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080047884 for (uint32_t n = 1; n <= 4; n++) {
47885 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080047886 GemmMicrokernelTester()
47887 .mr(3)
47888 .nr(4)
47889 .kr(1)
47890 .sr(1)
47891 .m(m)
47892 .n(n)
47893 .k(k)
47894 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080047895 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080047896 }
47897 }
47898 }
47899}
47900
47901TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_gt_4) {
47902 for (uint32_t n = 5; n < 8; n++) {
47903 for (size_t k = 1; k <= 5; k += 2) {
47904 GemmMicrokernelTester()
47905 .mr(3)
47906 .nr(4)
47907 .kr(1)
47908 .sr(1)
47909 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080047910 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080047911 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080047912 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080047913 }
47914 }
47915}
47916
47917TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_gt_4_strided_cn) {
47918 for (uint32_t n = 5; n < 8; n++) {
47919 for (size_t k = 1; k <= 5; k += 2) {
47920 GemmMicrokernelTester()
47921 .mr(3)
47922 .nr(4)
47923 .kr(1)
47924 .sr(1)
47925 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080047926 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080047927 .k(k)
47928 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080047929 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080047930 }
47931 }
47932}
47933
47934TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_gt_4_strided_a) {
47935 for (uint32_t n = 5; n < 8; n++) {
47936 for (size_t k = 1; k <= 5; k += 2) {
47937 GemmMicrokernelTester()
47938 .mr(3)
47939 .nr(4)
47940 .kr(1)
47941 .sr(1)
47942 .m(3)
47943 .n(n)
47944 .k(k)
47945 .a_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080047946 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080047947 }
47948 }
47949}
47950
47951TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_gt_4_subtile) {
47952 for (uint32_t n = 5; n < 8; n++) {
47953 for (size_t k = 1; k <= 5; k += 2) {
47954 for (uint32_t m = 1; m <= 3; m++) {
47955 GemmMicrokernelTester()
47956 .mr(3)
47957 .nr(4)
47958 .kr(1)
47959 .sr(1)
47960 .m(m)
47961 .n(n)
47962 .k(k)
47963 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080047964 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080047965 }
47966 }
47967 }
47968}
47969
47970TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_div_4) {
47971 for (uint32_t n = 8; n <= 12; n += 4) {
47972 for (size_t k = 1; k <= 5; k += 2) {
47973 GemmMicrokernelTester()
47974 .mr(3)
47975 .nr(4)
47976 .kr(1)
47977 .sr(1)
47978 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080047979 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080047980 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080047981 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080047982 }
47983 }
47984}
47985
47986TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_div_4_strided_cn) {
47987 for (uint32_t n = 8; n <= 12; n += 4) {
47988 for (size_t k = 1; k <= 5; k += 2) {
47989 GemmMicrokernelTester()
47990 .mr(3)
47991 .nr(4)
47992 .kr(1)
47993 .sr(1)
47994 .m(3)
47995 .n(n)
47996 .k(k)
47997 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080047998 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080047999 }
48000 }
48001}
48002
48003TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_div_4_strided_a) {
48004 for (uint32_t n = 8; n <= 12; n += 4) {
48005 for (size_t k = 1; k <= 5; k += 2) {
48006 GemmMicrokernelTester()
48007 .mr(3)
48008 .nr(4)
48009 .kr(1)
48010 .sr(1)
48011 .m(3)
48012 .n(n)
48013 .k(k)
48014 .a_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080048015 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080048016 }
48017 }
48018}
48019
48020TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_div_4_subtile) {
48021 for (uint32_t n = 8; n <= 12; n += 4) {
48022 for (size_t k = 1; k <= 5; k += 2) {
48023 for (uint32_t m = 1; m <= 3; m++) {
48024 GemmMicrokernelTester()
48025 .mr(3)
48026 .nr(4)
48027 .kr(1)
48028 .sr(1)
48029 .m(m)
48030 .n(n)
48031 .k(k)
48032 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080048033 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080048034 }
48035 }
48036 }
48037}
48038
48039TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, strided_cm_subtile) {
48040 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080048041 for (uint32_t n = 1; n <= 4; n++) {
48042 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080048043 GemmMicrokernelTester()
48044 .mr(3)
48045 .nr(4)
48046 .kr(1)
48047 .sr(1)
48048 .m(m)
48049 .n(n)
48050 .k(k)
48051 .cm_stride(7)
48052 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080048053 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080048054 }
48055 }
48056 }
48057}
48058
48059TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, qmin) {
48060 GemmMicrokernelTester()
48061 .mr(3)
48062 .nr(4)
48063 .kr(1)
48064 .sr(1)
48065 .m(3)
48066 .n(4)
48067 .k(1)
48068 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080048069 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080048070}
48071
48072TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, qmax) {
48073 GemmMicrokernelTester()
48074 .mr(3)
48075 .nr(4)
48076 .kr(1)
48077 .sr(1)
48078 .m(3)
48079 .n(4)
48080 .k(1)
48081 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080048082 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080048083}
48084
48085TEST(QS8_GEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, strided_cm) {
48086 GemmMicrokernelTester()
48087 .mr(3)
48088 .nr(4)
48089 .kr(1)
48090 .sr(1)
48091 .m(3)
48092 .n(4)
48093 .k(1)
48094 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080048095 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080048096}
48097
48098
48099TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_eq_1) {
48100 GemmMicrokernelTester()
48101 .mr(4)
48102 .nr(4)
48103 .kr(1)
48104 .sr(1)
48105 .m(4)
48106 .n(4)
48107 .k(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080048108 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080048109}
48110
48111TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, strided_cn) {
48112 GemmMicrokernelTester()
48113 .mr(4)
48114 .nr(4)
48115 .kr(1)
48116 .sr(1)
48117 .m(4)
48118 .n(4)
48119 .k(1)
48120 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080048121 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080048122}
48123
48124TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_eq_1_strided_a) {
48125 GemmMicrokernelTester()
48126 .mr(4)
48127 .nr(4)
48128 .kr(1)
48129 .sr(1)
48130 .m(4)
48131 .n(4)
48132 .k(1)
48133 .a_stride(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080048134 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080048135}
48136
48137TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080048138 for (uint32_t n = 1; n <= 4; n++) {
48139 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080048140 GemmMicrokernelTester()
48141 .mr(4)
48142 .nr(4)
48143 .kr(1)
48144 .sr(1)
48145 .m(m)
48146 .n(n)
48147 .k(1)
48148 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080048149 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080048150 }
48151 }
48152}
48153
48154TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_eq_1_subtile_m) {
48155 for (uint32_t m = 1; m <= 4; m++) {
48156 GemmMicrokernelTester()
48157 .mr(4)
48158 .nr(4)
48159 .kr(1)
48160 .sr(1)
48161 .m(m)
48162 .n(4)
48163 .k(1)
48164 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080048165 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080048166 }
48167}
48168
48169TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_eq_1_subtile_n) {
48170 for (uint32_t n = 1; n <= 4; n++) {
48171 GemmMicrokernelTester()
48172 .mr(4)
48173 .nr(4)
48174 .kr(1)
48175 .sr(1)
48176 .m(4)
48177 .n(n)
48178 .k(1)
48179 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080048180 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080048181 }
48182}
48183
48184TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_gt_1) {
48185 for (size_t k = 2; k < 10; k++) {
48186 GemmMicrokernelTester()
48187 .mr(4)
48188 .nr(4)
48189 .kr(1)
48190 .sr(1)
48191 .m(4)
48192 .n(4)
48193 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080048194 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080048195 }
48196}
48197
48198TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_gt_1_strided_a) {
48199 for (size_t k = 2; k < 10; k++) {
48200 GemmMicrokernelTester()
48201 .mr(4)
48202 .nr(4)
48203 .kr(1)
48204 .sr(1)
48205 .m(4)
48206 .n(4)
48207 .k(k)
48208 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080048209 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080048210 }
48211}
48212
48213TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_gt_1_subtile) {
48214 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080048215 for (uint32_t n = 1; n <= 4; n++) {
48216 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080048217 GemmMicrokernelTester()
48218 .mr(4)
48219 .nr(4)
48220 .kr(1)
48221 .sr(1)
48222 .m(m)
48223 .n(n)
48224 .k(k)
48225 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080048226 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080048227 }
48228 }
48229 }
48230}
48231
48232TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_gt_4) {
48233 for (uint32_t n = 5; n < 8; n++) {
48234 for (size_t k = 1; k <= 5; k += 2) {
48235 GemmMicrokernelTester()
48236 .mr(4)
48237 .nr(4)
48238 .kr(1)
48239 .sr(1)
48240 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080048241 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080048242 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080048243 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080048244 }
48245 }
48246}
48247
48248TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_gt_4_strided_cn) {
48249 for (uint32_t n = 5; n < 8; n++) {
48250 for (size_t k = 1; k <= 5; k += 2) {
48251 GemmMicrokernelTester()
48252 .mr(4)
48253 .nr(4)
48254 .kr(1)
48255 .sr(1)
48256 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080048257 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080048258 .k(k)
48259 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080048260 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080048261 }
48262 }
48263}
48264
48265TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_gt_4_strided_a) {
48266 for (uint32_t n = 5; n < 8; n++) {
48267 for (size_t k = 1; k <= 5; k += 2) {
48268 GemmMicrokernelTester()
48269 .mr(4)
48270 .nr(4)
48271 .kr(1)
48272 .sr(1)
48273 .m(4)
48274 .n(n)
48275 .k(k)
48276 .a_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080048277 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080048278 }
48279 }
48280}
48281
48282TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_gt_4_subtile) {
48283 for (uint32_t n = 5; n < 8; n++) {
48284 for (size_t k = 1; k <= 5; k += 2) {
48285 for (uint32_t m = 1; m <= 4; m++) {
48286 GemmMicrokernelTester()
48287 .mr(4)
48288 .nr(4)
48289 .kr(1)
48290 .sr(1)
48291 .m(m)
48292 .n(n)
48293 .k(k)
48294 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080048295 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080048296 }
48297 }
48298 }
48299}
48300
48301TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_div_4) {
48302 for (uint32_t n = 8; n <= 12; n += 4) {
48303 for (size_t k = 1; k <= 5; k += 2) {
48304 GemmMicrokernelTester()
48305 .mr(4)
48306 .nr(4)
48307 .kr(1)
48308 .sr(1)
48309 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080048310 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080048311 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080048312 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080048313 }
48314 }
48315}
48316
48317TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_div_4_strided_cn) {
48318 for (uint32_t n = 8; n <= 12; n += 4) {
48319 for (size_t k = 1; k <= 5; k += 2) {
48320 GemmMicrokernelTester()
48321 .mr(4)
48322 .nr(4)
48323 .kr(1)
48324 .sr(1)
48325 .m(4)
48326 .n(n)
48327 .k(k)
48328 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080048329 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080048330 }
48331 }
48332}
48333
48334TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_div_4_strided_a) {
48335 for (uint32_t n = 8; n <= 12; n += 4) {
48336 for (size_t k = 1; k <= 5; k += 2) {
48337 GemmMicrokernelTester()
48338 .mr(4)
48339 .nr(4)
48340 .kr(1)
48341 .sr(1)
48342 .m(4)
48343 .n(n)
48344 .k(k)
48345 .a_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080048346 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080048347 }
48348 }
48349}
48350
48351TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_div_4_subtile) {
48352 for (uint32_t n = 8; n <= 12; n += 4) {
48353 for (size_t k = 1; k <= 5; k += 2) {
48354 for (uint32_t m = 1; m <= 4; m++) {
48355 GemmMicrokernelTester()
48356 .mr(4)
48357 .nr(4)
48358 .kr(1)
48359 .sr(1)
48360 .m(m)
48361 .n(n)
48362 .k(k)
48363 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080048364 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080048365 }
48366 }
48367 }
48368}
48369
48370TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, strided_cm_subtile) {
48371 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080048372 for (uint32_t n = 1; n <= 4; n++) {
48373 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080048374 GemmMicrokernelTester()
48375 .mr(4)
48376 .nr(4)
48377 .kr(1)
48378 .sr(1)
48379 .m(m)
48380 .n(n)
48381 .k(k)
48382 .cm_stride(7)
48383 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080048384 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080048385 }
48386 }
48387 }
48388}
48389
48390TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, qmin) {
48391 GemmMicrokernelTester()
48392 .mr(4)
48393 .nr(4)
48394 .kr(1)
48395 .sr(1)
48396 .m(4)
48397 .n(4)
48398 .k(1)
48399 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080048400 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080048401}
48402
48403TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, qmax) {
48404 GemmMicrokernelTester()
48405 .mr(4)
48406 .nr(4)
48407 .kr(1)
48408 .sr(1)
48409 .m(4)
48410 .n(4)
48411 .k(1)
48412 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080048413 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080048414}
48415
48416TEST(QS8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, strided_cm) {
48417 GemmMicrokernelTester()
48418 .mr(4)
48419 .nr(4)
48420 .kr(1)
48421 .sr(1)
48422 .m(4)
48423 .n(4)
48424 .k(1)
48425 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080048426 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080048427}
48428
48429
48430TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1) {
48431 GemmMicrokernelTester()
48432 .mr(1)
48433 .nr(2)
48434 .kr(1)
48435 .sr(1)
48436 .m(1)
48437 .n(2)
48438 .k(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080048439 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080048440}
48441
48442TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, strided_cn) {
48443 GemmMicrokernelTester()
48444 .mr(1)
48445 .nr(2)
48446 .kr(1)
48447 .sr(1)
48448 .m(1)
48449 .n(2)
48450 .k(1)
48451 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080048452 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080048453}
48454
48455TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_strided_a) {
48456 GemmMicrokernelTester()
48457 .mr(1)
48458 .nr(2)
48459 .kr(1)
48460 .sr(1)
48461 .m(1)
48462 .n(2)
48463 .k(1)
48464 .a_stride(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080048465 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080048466}
48467
48468TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080048469 for (uint32_t n = 1; n <= 2; n++) {
48470 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080048471 GemmMicrokernelTester()
48472 .mr(1)
48473 .nr(2)
48474 .kr(1)
48475 .sr(1)
48476 .m(m)
48477 .n(n)
48478 .k(1)
48479 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080048480 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080048481 }
48482 }
48483}
48484
48485TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
48486 for (uint32_t m = 1; m <= 1; m++) {
48487 GemmMicrokernelTester()
48488 .mr(1)
48489 .nr(2)
48490 .kr(1)
48491 .sr(1)
48492 .m(m)
48493 .n(2)
48494 .k(1)
48495 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080048496 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080048497 }
48498}
48499
48500TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
48501 for (uint32_t n = 1; n <= 2; n++) {
48502 GemmMicrokernelTester()
48503 .mr(1)
48504 .nr(2)
48505 .kr(1)
48506 .sr(1)
48507 .m(1)
48508 .n(n)
48509 .k(1)
48510 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080048511 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080048512 }
48513}
48514
48515TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_gt_1) {
48516 for (size_t k = 2; k < 10; k++) {
48517 GemmMicrokernelTester()
48518 .mr(1)
48519 .nr(2)
48520 .kr(1)
48521 .sr(1)
48522 .m(1)
48523 .n(2)
48524 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080048525 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080048526 }
48527}
48528
48529TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_gt_1_strided_a) {
48530 for (size_t k = 2; k < 10; k++) {
48531 GemmMicrokernelTester()
48532 .mr(1)
48533 .nr(2)
48534 .kr(1)
48535 .sr(1)
48536 .m(1)
48537 .n(2)
48538 .k(k)
48539 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080048540 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080048541 }
48542}
48543
48544TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_gt_1_subtile) {
48545 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080048546 for (uint32_t n = 1; n <= 2; n++) {
48547 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080048548 GemmMicrokernelTester()
48549 .mr(1)
48550 .nr(2)
48551 .kr(1)
48552 .sr(1)
48553 .m(m)
48554 .n(n)
48555 .k(k)
48556 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080048557 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080048558 }
48559 }
48560 }
48561}
48562
48563TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2) {
48564 for (uint32_t n = 3; n < 4; n++) {
48565 for (size_t k = 1; k <= 5; k += 2) {
48566 GemmMicrokernelTester()
48567 .mr(1)
48568 .nr(2)
48569 .kr(1)
48570 .sr(1)
48571 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080048572 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080048573 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080048574 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080048575 }
48576 }
48577}
48578
48579TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
48580 for (uint32_t n = 3; n < 4; n++) {
48581 for (size_t k = 1; k <= 5; k += 2) {
48582 GemmMicrokernelTester()
48583 .mr(1)
48584 .nr(2)
48585 .kr(1)
48586 .sr(1)
48587 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080048588 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080048589 .k(k)
48590 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080048591 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080048592 }
48593 }
48594}
48595
48596TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2_strided_a) {
48597 for (uint32_t n = 3; n < 4; n++) {
48598 for (size_t k = 1; k <= 5; k += 2) {
48599 GemmMicrokernelTester()
48600 .mr(1)
48601 .nr(2)
48602 .kr(1)
48603 .sr(1)
48604 .m(1)
48605 .n(n)
48606 .k(k)
48607 .a_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080048608 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080048609 }
48610 }
48611}
48612
48613TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2_subtile) {
48614 for (uint32_t n = 3; n < 4; n++) {
48615 for (size_t k = 1; k <= 5; k += 2) {
48616 for (uint32_t m = 1; m <= 1; m++) {
48617 GemmMicrokernelTester()
48618 .mr(1)
48619 .nr(2)
48620 .kr(1)
48621 .sr(1)
48622 .m(m)
48623 .n(n)
48624 .k(k)
48625 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080048626 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080048627 }
48628 }
48629 }
48630}
48631
48632TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2) {
48633 for (uint32_t n = 4; n <= 6; n += 2) {
48634 for (size_t k = 1; k <= 5; k += 2) {
48635 GemmMicrokernelTester()
48636 .mr(1)
48637 .nr(2)
48638 .kr(1)
48639 .sr(1)
48640 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080048641 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080048642 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080048643 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080048644 }
48645 }
48646}
48647
48648TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2_strided_cn) {
48649 for (uint32_t n = 4; n <= 6; n += 2) {
48650 for (size_t k = 1; k <= 5; k += 2) {
48651 GemmMicrokernelTester()
48652 .mr(1)
48653 .nr(2)
48654 .kr(1)
48655 .sr(1)
48656 .m(1)
48657 .n(n)
48658 .k(k)
48659 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080048660 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080048661 }
48662 }
48663}
48664
48665TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2_strided_a) {
48666 for (uint32_t n = 4; n <= 6; n += 2) {
48667 for (size_t k = 1; k <= 5; k += 2) {
48668 GemmMicrokernelTester()
48669 .mr(1)
48670 .nr(2)
48671 .kr(1)
48672 .sr(1)
48673 .m(1)
48674 .n(n)
48675 .k(k)
48676 .a_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080048677 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080048678 }
48679 }
48680}
48681
48682TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2_subtile) {
48683 for (uint32_t n = 4; n <= 6; n += 2) {
48684 for (size_t k = 1; k <= 5; k += 2) {
48685 for (uint32_t m = 1; m <= 1; m++) {
48686 GemmMicrokernelTester()
48687 .mr(1)
48688 .nr(2)
48689 .kr(1)
48690 .sr(1)
48691 .m(m)
48692 .n(n)
48693 .k(k)
48694 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080048695 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080048696 }
48697 }
48698 }
48699}
48700
48701TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, strided_cm_subtile) {
48702 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080048703 for (uint32_t n = 1; n <= 2; n++) {
48704 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080048705 GemmMicrokernelTester()
48706 .mr(1)
48707 .nr(2)
48708 .kr(1)
48709 .sr(1)
48710 .m(m)
48711 .n(n)
48712 .k(k)
48713 .cm_stride(5)
48714 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080048715 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080048716 }
48717 }
48718 }
48719}
48720
48721TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, qmin) {
48722 GemmMicrokernelTester()
48723 .mr(1)
48724 .nr(2)
48725 .kr(1)
48726 .sr(1)
48727 .m(1)
48728 .n(2)
48729 .k(1)
48730 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080048731 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080048732}
48733
48734TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, qmax) {
48735 GemmMicrokernelTester()
48736 .mr(1)
48737 .nr(2)
48738 .kr(1)
48739 .sr(1)
48740 .m(1)
48741 .n(2)
48742 .k(1)
48743 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080048744 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080048745}
48746
48747TEST(QS8_GEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, strided_cm) {
48748 GemmMicrokernelTester()
48749 .mr(1)
48750 .nr(2)
48751 .kr(1)
48752 .sr(1)
48753 .m(1)
48754 .n(2)
48755 .k(1)
48756 .cm_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080048757 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080048758}
48759
48760
48761TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1) {
48762 GemmMicrokernelTester()
48763 .mr(2)
48764 .nr(2)
48765 .kr(1)
48766 .sr(1)
48767 .m(2)
48768 .n(2)
48769 .k(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080048770 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080048771}
48772
48773TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, strided_cn) {
48774 GemmMicrokernelTester()
48775 .mr(2)
48776 .nr(2)
48777 .kr(1)
48778 .sr(1)
48779 .m(2)
48780 .n(2)
48781 .k(1)
48782 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080048783 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080048784}
48785
48786TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1_strided_a) {
48787 GemmMicrokernelTester()
48788 .mr(2)
48789 .nr(2)
48790 .kr(1)
48791 .sr(1)
48792 .m(2)
48793 .n(2)
48794 .k(1)
48795 .a_stride(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080048796 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080048797}
48798
48799TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080048800 for (uint32_t n = 1; n <= 2; n++) {
48801 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080048802 GemmMicrokernelTester()
48803 .mr(2)
48804 .nr(2)
48805 .kr(1)
48806 .sr(1)
48807 .m(m)
48808 .n(n)
48809 .k(1)
48810 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080048811 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080048812 }
48813 }
48814}
48815
48816TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
48817 for (uint32_t m = 1; m <= 2; m++) {
48818 GemmMicrokernelTester()
48819 .mr(2)
48820 .nr(2)
48821 .kr(1)
48822 .sr(1)
48823 .m(m)
48824 .n(2)
48825 .k(1)
48826 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080048827 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080048828 }
48829}
48830
48831TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
48832 for (uint32_t n = 1; n <= 2; n++) {
48833 GemmMicrokernelTester()
48834 .mr(2)
48835 .nr(2)
48836 .kr(1)
48837 .sr(1)
48838 .m(2)
48839 .n(n)
48840 .k(1)
48841 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080048842 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080048843 }
48844}
48845
48846TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_gt_1) {
48847 for (size_t k = 2; k < 10; k++) {
48848 GemmMicrokernelTester()
48849 .mr(2)
48850 .nr(2)
48851 .kr(1)
48852 .sr(1)
48853 .m(2)
48854 .n(2)
48855 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080048856 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080048857 }
48858}
48859
48860TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_gt_1_strided_a) {
48861 for (size_t k = 2; k < 10; k++) {
48862 GemmMicrokernelTester()
48863 .mr(2)
48864 .nr(2)
48865 .kr(1)
48866 .sr(1)
48867 .m(2)
48868 .n(2)
48869 .k(k)
48870 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080048871 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080048872 }
48873}
48874
48875TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_gt_1_subtile) {
48876 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080048877 for (uint32_t n = 1; n <= 2; n++) {
48878 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080048879 GemmMicrokernelTester()
48880 .mr(2)
48881 .nr(2)
48882 .kr(1)
48883 .sr(1)
48884 .m(m)
48885 .n(n)
48886 .k(k)
48887 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080048888 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080048889 }
48890 }
48891 }
48892}
48893
48894TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2) {
48895 for (uint32_t n = 3; n < 4; n++) {
48896 for (size_t k = 1; k <= 5; k += 2) {
48897 GemmMicrokernelTester()
48898 .mr(2)
48899 .nr(2)
48900 .kr(1)
48901 .sr(1)
48902 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080048903 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080048904 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080048905 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080048906 }
48907 }
48908}
48909
48910TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
48911 for (uint32_t n = 3; n < 4; n++) {
48912 for (size_t k = 1; k <= 5; k += 2) {
48913 GemmMicrokernelTester()
48914 .mr(2)
48915 .nr(2)
48916 .kr(1)
48917 .sr(1)
48918 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080048919 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080048920 .k(k)
48921 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080048922 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080048923 }
48924 }
48925}
48926
48927TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2_strided_a) {
48928 for (uint32_t n = 3; n < 4; n++) {
48929 for (size_t k = 1; k <= 5; k += 2) {
48930 GemmMicrokernelTester()
48931 .mr(2)
48932 .nr(2)
48933 .kr(1)
48934 .sr(1)
48935 .m(2)
48936 .n(n)
48937 .k(k)
48938 .a_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080048939 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080048940 }
48941 }
48942}
48943
48944TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2_subtile) {
48945 for (uint32_t n = 3; n < 4; n++) {
48946 for (size_t k = 1; k <= 5; k += 2) {
48947 for (uint32_t m = 1; m <= 2; m++) {
48948 GemmMicrokernelTester()
48949 .mr(2)
48950 .nr(2)
48951 .kr(1)
48952 .sr(1)
48953 .m(m)
48954 .n(n)
48955 .k(k)
48956 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080048957 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080048958 }
48959 }
48960 }
48961}
48962
48963TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2) {
48964 for (uint32_t n = 4; n <= 6; n += 2) {
48965 for (size_t k = 1; k <= 5; k += 2) {
48966 GemmMicrokernelTester()
48967 .mr(2)
48968 .nr(2)
48969 .kr(1)
48970 .sr(1)
48971 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080048972 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080048973 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080048974 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080048975 }
48976 }
48977}
48978
48979TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2_strided_cn) {
48980 for (uint32_t n = 4; n <= 6; n += 2) {
48981 for (size_t k = 1; k <= 5; k += 2) {
48982 GemmMicrokernelTester()
48983 .mr(2)
48984 .nr(2)
48985 .kr(1)
48986 .sr(1)
48987 .m(2)
48988 .n(n)
48989 .k(k)
48990 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080048991 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080048992 }
48993 }
48994}
48995
48996TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2_strided_a) {
48997 for (uint32_t n = 4; n <= 6; n += 2) {
48998 for (size_t k = 1; k <= 5; k += 2) {
48999 GemmMicrokernelTester()
49000 .mr(2)
49001 .nr(2)
49002 .kr(1)
49003 .sr(1)
49004 .m(2)
49005 .n(n)
49006 .k(k)
49007 .a_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080049008 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080049009 }
49010 }
49011}
49012
49013TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2_subtile) {
49014 for (uint32_t n = 4; n <= 6; n += 2) {
49015 for (size_t k = 1; k <= 5; k += 2) {
49016 for (uint32_t m = 1; m <= 2; m++) {
49017 GemmMicrokernelTester()
49018 .mr(2)
49019 .nr(2)
49020 .kr(1)
49021 .sr(1)
49022 .m(m)
49023 .n(n)
49024 .k(k)
49025 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080049026 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080049027 }
49028 }
49029 }
49030}
49031
49032TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, strided_cm_subtile) {
49033 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080049034 for (uint32_t n = 1; n <= 2; n++) {
49035 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080049036 GemmMicrokernelTester()
49037 .mr(2)
49038 .nr(2)
49039 .kr(1)
49040 .sr(1)
49041 .m(m)
49042 .n(n)
49043 .k(k)
49044 .cm_stride(5)
49045 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080049046 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080049047 }
49048 }
49049 }
49050}
49051
49052TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, qmin) {
49053 GemmMicrokernelTester()
49054 .mr(2)
49055 .nr(2)
49056 .kr(1)
49057 .sr(1)
49058 .m(2)
49059 .n(2)
49060 .k(1)
49061 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080049062 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080049063}
49064
49065TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, qmax) {
49066 GemmMicrokernelTester()
49067 .mr(2)
49068 .nr(2)
49069 .kr(1)
49070 .sr(1)
49071 .m(2)
49072 .n(2)
49073 .k(1)
49074 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080049075 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080049076}
49077
49078TEST(QS8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, strided_cm) {
49079 GemmMicrokernelTester()
49080 .mr(2)
49081 .nr(2)
49082 .kr(1)
49083 .sr(1)
49084 .m(2)
49085 .n(2)
49086 .k(1)
49087 .cm_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080049088 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080049089}
49090
49091
Marat Dukhan272d4d92022-01-04 15:07:14 -080049092TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1) {
49093 GemmMicrokernelTester()
49094 .mr(1)
49095 .nr(4)
49096 .kr(1)
49097 .sr(1)
49098 .m(1)
49099 .n(4)
49100 .k(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080049101 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080049102}
49103
49104TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, strided_cn) {
49105 GemmMicrokernelTester()
49106 .mr(1)
49107 .nr(4)
49108 .kr(1)
49109 .sr(1)
49110 .m(1)
49111 .n(4)
49112 .k(1)
49113 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080049114 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080049115}
49116
49117TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_strided_a) {
49118 GemmMicrokernelTester()
49119 .mr(1)
49120 .nr(4)
49121 .kr(1)
49122 .sr(1)
49123 .m(1)
49124 .n(4)
49125 .k(1)
49126 .a_stride(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080049127 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080049128}
49129
49130TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080049131 for (uint32_t n = 1; n <= 4; n++) {
49132 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080049133 GemmMicrokernelTester()
49134 .mr(1)
49135 .nr(4)
49136 .kr(1)
49137 .sr(1)
49138 .m(m)
49139 .n(n)
49140 .k(1)
49141 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080049142 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080049143 }
49144 }
49145}
49146
49147TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
49148 for (uint32_t m = 1; m <= 1; m++) {
49149 GemmMicrokernelTester()
49150 .mr(1)
49151 .nr(4)
49152 .kr(1)
49153 .sr(1)
49154 .m(m)
49155 .n(4)
49156 .k(1)
49157 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080049158 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080049159 }
49160}
49161
49162TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
49163 for (uint32_t n = 1; n <= 4; n++) {
49164 GemmMicrokernelTester()
49165 .mr(1)
49166 .nr(4)
49167 .kr(1)
49168 .sr(1)
49169 .m(1)
49170 .n(n)
49171 .k(1)
49172 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080049173 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080049174 }
49175}
49176
49177TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_gt_1) {
49178 for (size_t k = 2; k < 10; k++) {
49179 GemmMicrokernelTester()
49180 .mr(1)
49181 .nr(4)
49182 .kr(1)
49183 .sr(1)
49184 .m(1)
49185 .n(4)
49186 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080049187 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080049188 }
49189}
49190
49191TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_gt_1_strided_a) {
49192 for (size_t k = 2; k < 10; k++) {
49193 GemmMicrokernelTester()
49194 .mr(1)
49195 .nr(4)
49196 .kr(1)
49197 .sr(1)
49198 .m(1)
49199 .n(4)
49200 .k(k)
49201 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080049202 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080049203 }
49204}
49205
49206TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_gt_1_subtile) {
49207 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080049208 for (uint32_t n = 1; n <= 4; n++) {
49209 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080049210 GemmMicrokernelTester()
49211 .mr(1)
49212 .nr(4)
49213 .kr(1)
49214 .sr(1)
49215 .m(m)
49216 .n(n)
49217 .k(k)
49218 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080049219 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080049220 }
49221 }
49222 }
49223}
49224
49225TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4) {
49226 for (uint32_t n = 5; n < 8; n++) {
49227 for (size_t k = 1; k <= 5; k += 2) {
49228 GemmMicrokernelTester()
49229 .mr(1)
49230 .nr(4)
49231 .kr(1)
49232 .sr(1)
49233 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080049234 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080049235 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080049236 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080049237 }
49238 }
49239}
49240
49241TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
49242 for (uint32_t n = 5; n < 8; n++) {
49243 for (size_t k = 1; k <= 5; k += 2) {
49244 GemmMicrokernelTester()
49245 .mr(1)
49246 .nr(4)
49247 .kr(1)
49248 .sr(1)
49249 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080049250 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080049251 .k(k)
49252 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080049253 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080049254 }
49255 }
49256}
49257
49258TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4_strided_a) {
49259 for (uint32_t n = 5; n < 8; n++) {
49260 for (size_t k = 1; k <= 5; k += 2) {
49261 GemmMicrokernelTester()
49262 .mr(1)
49263 .nr(4)
49264 .kr(1)
49265 .sr(1)
49266 .m(1)
49267 .n(n)
49268 .k(k)
49269 .a_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080049270 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080049271 }
49272 }
49273}
49274
49275TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4_subtile) {
49276 for (uint32_t n = 5; n < 8; n++) {
49277 for (size_t k = 1; k <= 5; k += 2) {
49278 for (uint32_t m = 1; m <= 1; m++) {
49279 GemmMicrokernelTester()
49280 .mr(1)
49281 .nr(4)
49282 .kr(1)
49283 .sr(1)
49284 .m(m)
49285 .n(n)
49286 .k(k)
49287 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080049288 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080049289 }
49290 }
49291 }
49292}
49293
49294TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4) {
49295 for (uint32_t n = 8; n <= 12; n += 4) {
49296 for (size_t k = 1; k <= 5; k += 2) {
49297 GemmMicrokernelTester()
49298 .mr(1)
49299 .nr(4)
49300 .kr(1)
49301 .sr(1)
49302 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080049303 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080049304 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080049305 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080049306 }
49307 }
49308}
49309
49310TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4_strided_cn) {
49311 for (uint32_t n = 8; n <= 12; n += 4) {
49312 for (size_t k = 1; k <= 5; k += 2) {
49313 GemmMicrokernelTester()
49314 .mr(1)
49315 .nr(4)
49316 .kr(1)
49317 .sr(1)
49318 .m(1)
49319 .n(n)
49320 .k(k)
49321 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080049322 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080049323 }
49324 }
49325}
49326
49327TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4_strided_a) {
49328 for (uint32_t n = 8; n <= 12; n += 4) {
49329 for (size_t k = 1; k <= 5; k += 2) {
49330 GemmMicrokernelTester()
49331 .mr(1)
49332 .nr(4)
49333 .kr(1)
49334 .sr(1)
49335 .m(1)
49336 .n(n)
49337 .k(k)
49338 .a_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080049339 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080049340 }
49341 }
49342}
49343
49344TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4_subtile) {
49345 for (uint32_t n = 8; n <= 12; n += 4) {
49346 for (size_t k = 1; k <= 5; k += 2) {
49347 for (uint32_t m = 1; m <= 1; m++) {
49348 GemmMicrokernelTester()
49349 .mr(1)
49350 .nr(4)
49351 .kr(1)
49352 .sr(1)
49353 .m(m)
49354 .n(n)
49355 .k(k)
49356 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080049357 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080049358 }
49359 }
49360 }
49361}
49362
49363TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, strided_cm_subtile) {
49364 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080049365 for (uint32_t n = 1; n <= 4; n++) {
49366 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080049367 GemmMicrokernelTester()
49368 .mr(1)
49369 .nr(4)
49370 .kr(1)
49371 .sr(1)
49372 .m(m)
49373 .n(n)
49374 .k(k)
49375 .cm_stride(7)
49376 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080049377 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080049378 }
49379 }
49380 }
49381}
49382
49383TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, qmin) {
49384 GemmMicrokernelTester()
49385 .mr(1)
49386 .nr(4)
49387 .kr(1)
49388 .sr(1)
49389 .m(1)
49390 .n(4)
49391 .k(1)
49392 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080049393 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080049394}
49395
49396TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, qmax) {
49397 GemmMicrokernelTester()
49398 .mr(1)
49399 .nr(4)
49400 .kr(1)
49401 .sr(1)
49402 .m(1)
49403 .n(4)
49404 .k(1)
49405 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080049406 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080049407}
49408
49409TEST(QS8_GEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, strided_cm) {
49410 GemmMicrokernelTester()
49411 .mr(1)
49412 .nr(4)
49413 .kr(1)
49414 .sr(1)
49415 .m(1)
49416 .n(4)
49417 .k(1)
49418 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080049419 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080049420}
49421
49422
49423TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1) {
49424 GemmMicrokernelTester()
49425 .mr(2)
49426 .nr(4)
49427 .kr(1)
49428 .sr(1)
49429 .m(2)
49430 .n(4)
49431 .k(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080049432 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080049433}
49434
49435TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, strided_cn) {
49436 GemmMicrokernelTester()
49437 .mr(2)
49438 .nr(4)
49439 .kr(1)
49440 .sr(1)
49441 .m(2)
49442 .n(4)
49443 .k(1)
49444 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080049445 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080049446}
49447
49448TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1_strided_a) {
49449 GemmMicrokernelTester()
49450 .mr(2)
49451 .nr(4)
49452 .kr(1)
49453 .sr(1)
49454 .m(2)
49455 .n(4)
49456 .k(1)
49457 .a_stride(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080049458 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080049459}
49460
49461TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080049462 for (uint32_t n = 1; n <= 4; n++) {
49463 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080049464 GemmMicrokernelTester()
49465 .mr(2)
49466 .nr(4)
49467 .kr(1)
49468 .sr(1)
49469 .m(m)
49470 .n(n)
49471 .k(1)
49472 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080049473 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080049474 }
49475 }
49476}
49477
49478TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
49479 for (uint32_t m = 1; m <= 2; m++) {
49480 GemmMicrokernelTester()
49481 .mr(2)
49482 .nr(4)
49483 .kr(1)
49484 .sr(1)
49485 .m(m)
49486 .n(4)
49487 .k(1)
49488 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080049489 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080049490 }
49491}
49492
49493TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
49494 for (uint32_t n = 1; n <= 4; n++) {
49495 GemmMicrokernelTester()
49496 .mr(2)
49497 .nr(4)
49498 .kr(1)
49499 .sr(1)
49500 .m(2)
49501 .n(n)
49502 .k(1)
49503 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080049504 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080049505 }
49506}
49507
49508TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_gt_1) {
49509 for (size_t k = 2; k < 10; k++) {
49510 GemmMicrokernelTester()
49511 .mr(2)
49512 .nr(4)
49513 .kr(1)
49514 .sr(1)
49515 .m(2)
49516 .n(4)
49517 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080049518 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080049519 }
49520}
49521
49522TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_gt_1_strided_a) {
49523 for (size_t k = 2; k < 10; k++) {
49524 GemmMicrokernelTester()
49525 .mr(2)
49526 .nr(4)
49527 .kr(1)
49528 .sr(1)
49529 .m(2)
49530 .n(4)
49531 .k(k)
49532 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080049533 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080049534 }
49535}
49536
49537TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_gt_1_subtile) {
49538 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080049539 for (uint32_t n = 1; n <= 4; n++) {
49540 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080049541 GemmMicrokernelTester()
49542 .mr(2)
49543 .nr(4)
49544 .kr(1)
49545 .sr(1)
49546 .m(m)
49547 .n(n)
49548 .k(k)
49549 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080049550 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080049551 }
49552 }
49553 }
49554}
49555
49556TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4) {
49557 for (uint32_t n = 5; n < 8; n++) {
49558 for (size_t k = 1; k <= 5; k += 2) {
49559 GemmMicrokernelTester()
49560 .mr(2)
49561 .nr(4)
49562 .kr(1)
49563 .sr(1)
49564 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080049565 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080049566 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080049567 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080049568 }
49569 }
49570}
49571
49572TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
49573 for (uint32_t n = 5; n < 8; n++) {
49574 for (size_t k = 1; k <= 5; k += 2) {
49575 GemmMicrokernelTester()
49576 .mr(2)
49577 .nr(4)
49578 .kr(1)
49579 .sr(1)
49580 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080049581 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080049582 .k(k)
49583 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080049584 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080049585 }
49586 }
49587}
49588
49589TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4_strided_a) {
49590 for (uint32_t n = 5; n < 8; n++) {
49591 for (size_t k = 1; k <= 5; k += 2) {
49592 GemmMicrokernelTester()
49593 .mr(2)
49594 .nr(4)
49595 .kr(1)
49596 .sr(1)
49597 .m(2)
49598 .n(n)
49599 .k(k)
49600 .a_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080049601 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080049602 }
49603 }
49604}
49605
49606TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4_subtile) {
49607 for (uint32_t n = 5; n < 8; n++) {
49608 for (size_t k = 1; k <= 5; k += 2) {
49609 for (uint32_t m = 1; m <= 2; m++) {
49610 GemmMicrokernelTester()
49611 .mr(2)
49612 .nr(4)
49613 .kr(1)
49614 .sr(1)
49615 .m(m)
49616 .n(n)
49617 .k(k)
49618 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080049619 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080049620 }
49621 }
49622 }
49623}
49624
49625TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4) {
49626 for (uint32_t n = 8; n <= 12; n += 4) {
49627 for (size_t k = 1; k <= 5; k += 2) {
49628 GemmMicrokernelTester()
49629 .mr(2)
49630 .nr(4)
49631 .kr(1)
49632 .sr(1)
49633 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080049634 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080049635 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080049636 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080049637 }
49638 }
49639}
49640
49641TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4_strided_cn) {
49642 for (uint32_t n = 8; n <= 12; n += 4) {
49643 for (size_t k = 1; k <= 5; k += 2) {
49644 GemmMicrokernelTester()
49645 .mr(2)
49646 .nr(4)
49647 .kr(1)
49648 .sr(1)
49649 .m(2)
49650 .n(n)
49651 .k(k)
49652 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080049653 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080049654 }
49655 }
49656}
49657
49658TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4_strided_a) {
49659 for (uint32_t n = 8; n <= 12; n += 4) {
49660 for (size_t k = 1; k <= 5; k += 2) {
49661 GemmMicrokernelTester()
49662 .mr(2)
49663 .nr(4)
49664 .kr(1)
49665 .sr(1)
49666 .m(2)
49667 .n(n)
49668 .k(k)
49669 .a_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080049670 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080049671 }
49672 }
49673}
49674
49675TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4_subtile) {
49676 for (uint32_t n = 8; n <= 12; n += 4) {
49677 for (size_t k = 1; k <= 5; k += 2) {
49678 for (uint32_t m = 1; m <= 2; m++) {
49679 GemmMicrokernelTester()
49680 .mr(2)
49681 .nr(4)
49682 .kr(1)
49683 .sr(1)
49684 .m(m)
49685 .n(n)
49686 .k(k)
49687 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080049688 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080049689 }
49690 }
49691 }
49692}
49693
49694TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, strided_cm_subtile) {
49695 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080049696 for (uint32_t n = 1; n <= 4; n++) {
49697 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080049698 GemmMicrokernelTester()
49699 .mr(2)
49700 .nr(4)
49701 .kr(1)
49702 .sr(1)
49703 .m(m)
49704 .n(n)
49705 .k(k)
49706 .cm_stride(7)
49707 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080049708 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080049709 }
49710 }
49711 }
49712}
49713
49714TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, qmin) {
49715 GemmMicrokernelTester()
49716 .mr(2)
49717 .nr(4)
49718 .kr(1)
49719 .sr(1)
49720 .m(2)
49721 .n(4)
49722 .k(1)
49723 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080049724 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080049725}
49726
49727TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, qmax) {
49728 GemmMicrokernelTester()
49729 .mr(2)
49730 .nr(4)
49731 .kr(1)
49732 .sr(1)
49733 .m(2)
49734 .n(4)
49735 .k(1)
49736 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080049737 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080049738}
49739
49740TEST(QS8_GEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, strided_cm) {
49741 GemmMicrokernelTester()
49742 .mr(2)
49743 .nr(4)
49744 .kr(1)
49745 .sr(1)
49746 .m(2)
49747 .n(4)
49748 .k(1)
49749 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080049750 .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080049751}