blob: 4a27599a7ef3973c5fdc739a3c8713df2a3cfd8f [file] [log] [blame]
Marat Dukhanef47f8d2021-07-02 15:08:32 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8//
9// Auto-generated file. Do not edit!
10// Specification: test/qu8-igemm-minmax-fp32.yaml
11// Generator: tools/generate-gemm-test.py
12
13
14#include <gtest/gtest.h>
15
Frank Barchard447aa7b2021-12-28 14:11:40 -080016#include <xnnpack/allocator.h>
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017#include <xnnpack/common.h>
18#include <xnnpack/isa-checks.h>
19
20#include <xnnpack/gemm.h>
21#include <xnnpack/igemm.h>
22#include <xnnpack/ppmm.h>
23#include "gemm-microkernel-tester.h"
24
25
Frank Barcharde4d3f762021-12-23 15:31:43 -080026#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Frank Barcharde4d3f762021-12-23 15:31:43 -080027 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_eq_16) {
28 TEST_REQUIRES_ARM_NEON_DOT;
29 GemmMicrokernelTester()
30 .mr(4)
31 .nr(16)
32 .kr(4)
33 .sr(1)
34 .m(4)
35 .n(16)
36 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080037 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Frank Barcharde4d3f762021-12-23 15:31:43 -080038 }
39
40 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, strided_cn) {
41 TEST_REQUIRES_ARM_NEON_DOT;
42 GemmMicrokernelTester()
43 .mr(4)
44 .nr(16)
45 .kr(4)
46 .sr(1)
47 .m(4)
48 .n(16)
49 .k(16)
50 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080051 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Frank Barcharde4d3f762021-12-23 15:31:43 -080052 }
53
54 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_eq_16_subtile) {
55 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng83844ae2022-01-14 09:52:25 -080056 for (uint32_t n = 1; n <= 16; n++) {
57 for (uint32_t m = 1; m <= 4; m++) {
Frank Barcharde4d3f762021-12-23 15:31:43 -080058 GemmMicrokernelTester()
59 .mr(4)
60 .nr(16)
61 .kr(4)
62 .sr(1)
63 .m(m)
64 .n(n)
65 .k(16)
66 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080067 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Frank Barcharde4d3f762021-12-23 15:31:43 -080068 }
69 }
70 }
71
72 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_eq_16_subtile_m) {
73 TEST_REQUIRES_ARM_NEON_DOT;
74 for (uint32_t m = 1; m <= 4; m++) {
75 GemmMicrokernelTester()
76 .mr(4)
77 .nr(16)
78 .kr(4)
79 .sr(1)
80 .m(m)
81 .n(16)
82 .k(16)
83 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080084 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Frank Barcharde4d3f762021-12-23 15:31:43 -080085 }
86 }
87
88 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_eq_16_subtile_n) {
89 TEST_REQUIRES_ARM_NEON_DOT;
90 for (uint32_t n = 1; n <= 16; n++) {
91 GemmMicrokernelTester()
92 .mr(4)
93 .nr(16)
94 .kr(4)
95 .sr(1)
96 .m(4)
97 .n(n)
98 .k(16)
99 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800100 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Frank Barcharde4d3f762021-12-23 15:31:43 -0800101 }
102 }
103
104 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_lt_16) {
105 TEST_REQUIRES_ARM_NEON_DOT;
106 for (size_t k = 1; k < 16; k++) {
107 GemmMicrokernelTester()
108 .mr(4)
109 .nr(16)
110 .kr(4)
111 .sr(1)
112 .m(4)
113 .n(16)
114 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -0800115 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Frank Barcharde4d3f762021-12-23 15:31:43 -0800116 }
117 }
118
119 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_lt_16_subtile) {
120 TEST_REQUIRES_ARM_NEON_DOT;
121 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800122 for (uint32_t n = 1; n <= 16; n++) {
123 for (uint32_t m = 1; m <= 4; m++) {
Frank Barcharde4d3f762021-12-23 15:31:43 -0800124 GemmMicrokernelTester()
125 .mr(4)
126 .nr(16)
127 .kr(4)
128 .sr(1)
129 .m(m)
130 .n(n)
131 .k(k)
132 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800133 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Frank Barcharde4d3f762021-12-23 15:31:43 -0800134 }
135 }
136 }
137 }
138
139 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_gt_16) {
140 TEST_REQUIRES_ARM_NEON_DOT;
141 for (size_t k = 17; k < 32; k++) {
142 GemmMicrokernelTester()
143 .mr(4)
144 .nr(16)
145 .kr(4)
146 .sr(1)
147 .m(4)
148 .n(16)
149 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -0800150 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Frank Barcharde4d3f762021-12-23 15:31:43 -0800151 }
152 }
153
154 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_gt_16_subtile) {
155 TEST_REQUIRES_ARM_NEON_DOT;
156 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800157 for (uint32_t n = 1; n <= 16; n++) {
158 for (uint32_t m = 1; m <= 4; m++) {
Frank Barcharde4d3f762021-12-23 15:31:43 -0800159 GemmMicrokernelTester()
160 .mr(4)
161 .nr(16)
162 .kr(4)
163 .sr(1)
164 .m(m)
165 .n(n)
166 .k(k)
167 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800168 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Frank Barcharde4d3f762021-12-23 15:31:43 -0800169 }
170 }
171 }
172 }
173
174 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_div_16) {
175 TEST_REQUIRES_ARM_NEON_DOT;
176 for (size_t k = 32; k <= 160; k += 16) {
177 GemmMicrokernelTester()
178 .mr(4)
179 .nr(16)
180 .kr(4)
181 .sr(1)
182 .m(4)
183 .n(16)
184 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -0800185 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Frank Barcharde4d3f762021-12-23 15:31:43 -0800186 }
187 }
188
189 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_div_16_subtile) {
190 TEST_REQUIRES_ARM_NEON_DOT;
191 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800192 for (uint32_t n = 1; n <= 16; n++) {
193 for (uint32_t m = 1; m <= 4; m++) {
Frank Barcharde4d3f762021-12-23 15:31:43 -0800194 GemmMicrokernelTester()
195 .mr(4)
196 .nr(16)
197 .kr(4)
198 .sr(1)
199 .m(m)
200 .n(n)
201 .k(k)
202 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800203 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Frank Barcharde4d3f762021-12-23 15:31:43 -0800204 }
205 }
206 }
207 }
208
209 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, n_gt_16) {
210 TEST_REQUIRES_ARM_NEON_DOT;
211 for (uint32_t n = 17; n < 32; n++) {
212 for (size_t k = 1; k <= 80; k += 17) {
213 GemmMicrokernelTester()
214 .mr(4)
215 .nr(16)
216 .kr(4)
217 .sr(1)
218 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800219 .n(n)
Frank Barcharde4d3f762021-12-23 15:31:43 -0800220 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -0800221 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Frank Barcharde4d3f762021-12-23 15:31:43 -0800222 }
223 }
224 }
225
226 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, n_gt_16_strided_cn) {
227 TEST_REQUIRES_ARM_NEON_DOT;
228 for (uint32_t n = 17; n < 32; n++) {
229 for (size_t k = 1; k <= 80; k += 17) {
230 GemmMicrokernelTester()
231 .mr(4)
232 .nr(16)
233 .kr(4)
234 .sr(1)
235 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800236 .n(n)
Frank Barcharde4d3f762021-12-23 15:31:43 -0800237 .k(k)
238 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -0800239 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Frank Barcharde4d3f762021-12-23 15:31:43 -0800240 }
241 }
242 }
243
244 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, n_gt_16_subtile) {
245 TEST_REQUIRES_ARM_NEON_DOT;
246 for (uint32_t n = 17; n < 32; n++) {
247 for (size_t k = 1; k <= 80; k += 17) {
248 for (uint32_t m = 1; m <= 4; m++) {
249 GemmMicrokernelTester()
250 .mr(4)
251 .nr(16)
252 .kr(4)
253 .sr(1)
254 .m(m)
255 .n(n)
256 .k(k)
257 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800258 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Frank Barcharde4d3f762021-12-23 15:31:43 -0800259 }
260 }
261 }
262 }
263
264 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, n_div_16) {
265 TEST_REQUIRES_ARM_NEON_DOT;
266 for (uint32_t n = 32; n <= 48; n += 16) {
267 for (size_t k = 1; k <= 80; k += 17) {
268 GemmMicrokernelTester()
269 .mr(4)
270 .nr(16)
271 .kr(4)
272 .sr(1)
273 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800274 .n(n)
Frank Barcharde4d3f762021-12-23 15:31:43 -0800275 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -0800276 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Frank Barcharde4d3f762021-12-23 15:31:43 -0800277 }
278 }
279 }
280
281 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, n_div_16_strided_cn) {
282 TEST_REQUIRES_ARM_NEON_DOT;
283 for (uint32_t n = 32; n <= 48; n += 16) {
284 for (size_t k = 1; k <= 80; k += 17) {
285 GemmMicrokernelTester()
286 .mr(4)
287 .nr(16)
288 .kr(4)
289 .sr(1)
290 .m(4)
291 .n(n)
292 .k(k)
293 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -0800294 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Frank Barcharde4d3f762021-12-23 15:31:43 -0800295 }
296 }
297 }
298
299 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, n_div_16_subtile) {
300 TEST_REQUIRES_ARM_NEON_DOT;
301 for (uint32_t n = 32; n <= 48; n += 16) {
302 for (size_t k = 1; k <= 80; k += 17) {
303 for (uint32_t m = 1; m <= 4; m++) {
304 GemmMicrokernelTester()
305 .mr(4)
306 .nr(16)
307 .kr(4)
308 .sr(1)
309 .m(m)
310 .n(n)
311 .k(k)
312 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800313 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Frank Barcharde4d3f762021-12-23 15:31:43 -0800314 }
315 }
316 }
317 }
318
319 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, small_kernel) {
320 TEST_REQUIRES_ARM_NEON_DOT;
321 for (size_t k = 1; k <= 80; k += 17) {
322 GemmMicrokernelTester()
323 .mr(4)
324 .nr(16)
325 .kr(4)
326 .sr(1)
327 .m(4)
328 .n(16)
329 .k(k)
330 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -0800331 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Frank Barcharde4d3f762021-12-23 15:31:43 -0800332 }
333 }
334
335 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, small_kernel_subtile) {
336 TEST_REQUIRES_ARM_NEON_DOT;
337 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800338 for (uint32_t n = 1; n <= 16; n++) {
339 for (uint32_t m = 1; m <= 4; m++) {
Frank Barcharde4d3f762021-12-23 15:31:43 -0800340 GemmMicrokernelTester()
341 .mr(4)
342 .nr(16)
343 .kr(4)
344 .sr(1)
345 .m(m)
346 .n(n)
347 .k(k)
348 .ks(3)
349 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800350 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Frank Barcharde4d3f762021-12-23 15:31:43 -0800351 }
352 }
353 }
354 }
355
356 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, n_gt_16_small_kernel) {
357 TEST_REQUIRES_ARM_NEON_DOT;
358 for (uint32_t n = 17; n < 32; n++) {
359 for (size_t k = 1; k <= 80; k += 17) {
360 GemmMicrokernelTester()
361 .mr(4)
362 .nr(16)
363 .kr(4)
364 .sr(1)
365 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800366 .n(n)
Frank Barcharde4d3f762021-12-23 15:31:43 -0800367 .k(k)
368 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -0800369 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Frank Barcharde4d3f762021-12-23 15:31:43 -0800370 }
371 }
372 }
373
374 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, n_div_16_small_kernel) {
375 TEST_REQUIRES_ARM_NEON_DOT;
376 for (uint32_t n = 32; n <= 48; n += 16) {
377 for (size_t k = 1; k <= 80; k += 17) {
378 GemmMicrokernelTester()
379 .mr(4)
380 .nr(16)
381 .kr(4)
382 .sr(1)
383 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800384 .n(n)
Frank Barcharde4d3f762021-12-23 15:31:43 -0800385 .k(k)
386 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -0800387 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Frank Barcharde4d3f762021-12-23 15:31:43 -0800388 }
389 }
390 }
391
392 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, strided_cm_subtile) {
393 TEST_REQUIRES_ARM_NEON_DOT;
394 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800395 for (uint32_t n = 1; n <= 16; n++) {
396 for (uint32_t m = 1; m <= 4; m++) {
Frank Barcharde4d3f762021-12-23 15:31:43 -0800397 GemmMicrokernelTester()
398 .mr(4)
399 .nr(16)
400 .kr(4)
401 .sr(1)
402 .m(m)
403 .n(n)
404 .k(k)
405 .cm_stride(19)
406 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800407 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Frank Barcharde4d3f762021-12-23 15:31:43 -0800408 }
409 }
410 }
411 }
412
413 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, a_offset) {
414 TEST_REQUIRES_ARM_NEON_DOT;
415 for (size_t k = 1; k <= 80; k += 17) {
416 GemmMicrokernelTester()
417 .mr(4)
418 .nr(16)
419 .kr(4)
420 .sr(1)
421 .m(4)
422 .n(16)
423 .k(k)
424 .ks(3)
425 .a_offset(331)
Marat Dukhan50323b82022-01-11 00:12:01 -0800426 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Frank Barcharde4d3f762021-12-23 15:31:43 -0800427 }
428 }
429
430 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, zero) {
431 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng83844ae2022-01-14 09:52:25 -0800432 for (size_t k = 1; k <= 80; k += 17) {
433 for (uint32_t mz = 0; mz < 4; mz++) {
Frank Barcharde4d3f762021-12-23 15:31:43 -0800434 GemmMicrokernelTester()
435 .mr(4)
436 .nr(16)
437 .kr(4)
438 .sr(1)
439 .m(4)
440 .n(16)
441 .k(k)
442 .ks(3)
443 .a_offset(331)
444 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -0800445 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Frank Barcharde4d3f762021-12-23 15:31:43 -0800446 }
447 }
448 }
449
450 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, qmin) {
451 TEST_REQUIRES_ARM_NEON_DOT;
452 GemmMicrokernelTester()
453 .mr(4)
454 .nr(16)
455 .kr(4)
456 .sr(1)
457 .m(4)
458 .n(16)
459 .k(16)
460 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800461 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Frank Barcharde4d3f762021-12-23 15:31:43 -0800462 }
463
464 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, qmax) {
465 TEST_REQUIRES_ARM_NEON_DOT;
466 GemmMicrokernelTester()
467 .mr(4)
468 .nr(16)
469 .kr(4)
470 .sr(1)
471 .m(4)
472 .n(16)
473 .k(16)
474 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800475 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Frank Barcharde4d3f762021-12-23 15:31:43 -0800476 }
477
478 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, strided_cm) {
479 TEST_REQUIRES_ARM_NEON_DOT;
480 GemmMicrokernelTester()
481 .mr(4)
482 .nr(16)
483 .kr(4)
484 .sr(1)
485 .m(4)
486 .n(16)
487 .k(16)
488 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -0800489 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Frank Barcharde4d3f762021-12-23 15:31:43 -0800490 }
491
492 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, no_a_zero_point) {
493 TEST_REQUIRES_ARM_NEON_DOT;
494 for (size_t k = 1; k <= 80; k += 17) {
495 GemmMicrokernelTester()
496 .mr(4)
497 .nr(16)
498 .kr(4)
499 .sr(1)
500 .m(4)
501 .n(16)
502 .k(k)
503 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -0800504 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Frank Barcharde4d3f762021-12-23 15:31:43 -0800505 }
506 }
507
508 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, no_b_zero_point) {
509 TEST_REQUIRES_ARM_NEON_DOT;
510 for (size_t k = 1; k <= 80; k += 17) {
511 GemmMicrokernelTester()
512 .mr(4)
513 .nr(16)
514 .kr(4)
515 .sr(1)
516 .m(4)
517 .n(16)
518 .k(k)
519 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -0800520 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Frank Barcharde4d3f762021-12-23 15:31:43 -0800521 }
522 }
523
524 TEST(QU8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, no_zero_point) {
525 TEST_REQUIRES_ARM_NEON_DOT;
526 for (size_t k = 1; k <= 80; k += 17) {
527 GemmMicrokernelTester()
528 .mr(4)
529 .nr(16)
530 .kr(4)
531 .sr(1)
532 .m(4)
533 .n(16)
534 .k(k)
535 .a_zero_point(0)
536 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -0800537 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Frank Barcharde4d3f762021-12-23 15:31:43 -0800538 }
539 }
540#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
541
542
Marat Dukhan69c8a292021-07-14 19:34:56 -0700543#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhan69c8a292021-07-14 19:34:56 -0700544 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_eq_8) {
545 TEST_REQUIRES_ARM_NEON;
546 GemmMicrokernelTester()
547 .mr(4)
548 .nr(16)
549 .kr(1)
550 .sr(1)
551 .m(4)
552 .n(16)
553 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -0800554 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -0700555 }
556
557 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, strided_cn) {
558 TEST_REQUIRES_ARM_NEON;
559 GemmMicrokernelTester()
560 .mr(4)
561 .nr(16)
562 .kr(1)
563 .sr(1)
564 .m(4)
565 .n(16)
566 .k(8)
567 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -0800568 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -0700569 }
570
571 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_eq_8_subtile) {
572 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -0800573 for (uint32_t n = 1; n <= 16; n++) {
574 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan69c8a292021-07-14 19:34:56 -0700575 GemmMicrokernelTester()
576 .mr(4)
577 .nr(16)
578 .kr(1)
579 .sr(1)
580 .m(m)
581 .n(n)
582 .k(8)
583 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800584 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -0700585 }
586 }
587 }
588
589 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_eq_8_subtile_m) {
590 TEST_REQUIRES_ARM_NEON;
591 for (uint32_t m = 1; m <= 4; m++) {
592 GemmMicrokernelTester()
593 .mr(4)
594 .nr(16)
595 .kr(1)
596 .sr(1)
597 .m(m)
598 .n(16)
599 .k(8)
600 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800601 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -0700602 }
603 }
604
605 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_eq_8_subtile_n) {
606 TEST_REQUIRES_ARM_NEON;
607 for (uint32_t n = 1; n <= 16; n++) {
608 GemmMicrokernelTester()
609 .mr(4)
610 .nr(16)
611 .kr(1)
612 .sr(1)
613 .m(4)
614 .n(n)
615 .k(8)
616 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800617 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -0700618 }
619 }
620
621 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_lt_8) {
622 TEST_REQUIRES_ARM_NEON;
623 for (size_t k = 1; k < 8; k++) {
624 GemmMicrokernelTester()
625 .mr(4)
626 .nr(16)
627 .kr(1)
628 .sr(1)
629 .m(4)
630 .n(16)
631 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -0800632 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -0700633 }
634 }
635
636 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_lt_8_subtile) {
637 TEST_REQUIRES_ARM_NEON;
638 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800639 for (uint32_t n = 1; n <= 16; n++) {
640 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan69c8a292021-07-14 19:34:56 -0700641 GemmMicrokernelTester()
642 .mr(4)
643 .nr(16)
644 .kr(1)
645 .sr(1)
646 .m(m)
647 .n(n)
648 .k(k)
649 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800650 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -0700651 }
652 }
653 }
654 }
655
656 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_gt_8) {
657 TEST_REQUIRES_ARM_NEON;
658 for (size_t k = 9; k < 16; k++) {
659 GemmMicrokernelTester()
660 .mr(4)
661 .nr(16)
662 .kr(1)
663 .sr(1)
664 .m(4)
665 .n(16)
666 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -0800667 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -0700668 }
669 }
670
671 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_gt_8_subtile) {
672 TEST_REQUIRES_ARM_NEON;
673 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800674 for (uint32_t n = 1; n <= 16; n++) {
675 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan69c8a292021-07-14 19:34:56 -0700676 GemmMicrokernelTester()
677 .mr(4)
678 .nr(16)
679 .kr(1)
680 .sr(1)
681 .m(m)
682 .n(n)
683 .k(k)
684 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800685 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -0700686 }
687 }
688 }
689 }
690
691 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_div_8) {
692 TEST_REQUIRES_ARM_NEON;
693 for (size_t k = 16; k <= 80; k += 8) {
694 GemmMicrokernelTester()
695 .mr(4)
696 .nr(16)
697 .kr(1)
698 .sr(1)
699 .m(4)
700 .n(16)
701 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -0800702 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -0700703 }
704 }
705
706 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_div_8_subtile) {
707 TEST_REQUIRES_ARM_NEON;
708 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800709 for (uint32_t n = 1; n <= 16; n++) {
710 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan69c8a292021-07-14 19:34:56 -0700711 GemmMicrokernelTester()
712 .mr(4)
713 .nr(16)
714 .kr(1)
715 .sr(1)
716 .m(m)
717 .n(n)
718 .k(k)
719 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800720 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -0700721 }
722 }
723 }
724 }
725
726 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_gt_16) {
727 TEST_REQUIRES_ARM_NEON;
728 for (uint32_t n = 17; n < 32; n++) {
729 for (size_t k = 1; k <= 40; k += 9) {
730 GemmMicrokernelTester()
731 .mr(4)
732 .nr(16)
733 .kr(1)
734 .sr(1)
735 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800736 .n(n)
Marat Dukhan69c8a292021-07-14 19:34:56 -0700737 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -0800738 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -0700739 }
740 }
741 }
742
743 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_gt_16_strided_cn) {
744 TEST_REQUIRES_ARM_NEON;
745 for (uint32_t n = 17; n < 32; n++) {
746 for (size_t k = 1; k <= 40; k += 9) {
747 GemmMicrokernelTester()
748 .mr(4)
749 .nr(16)
750 .kr(1)
751 .sr(1)
752 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800753 .n(n)
Marat Dukhan69c8a292021-07-14 19:34:56 -0700754 .k(k)
755 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -0800756 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -0700757 }
758 }
759 }
760
761 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_gt_16_subtile) {
762 TEST_REQUIRES_ARM_NEON;
763 for (uint32_t n = 17; n < 32; n++) {
764 for (size_t k = 1; k <= 40; k += 9) {
765 for (uint32_t m = 1; m <= 4; m++) {
766 GemmMicrokernelTester()
767 .mr(4)
768 .nr(16)
769 .kr(1)
770 .sr(1)
771 .m(m)
772 .n(n)
773 .k(k)
774 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800775 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -0700776 }
777 }
778 }
779 }
780
781 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_div_16) {
782 TEST_REQUIRES_ARM_NEON;
783 for (uint32_t n = 32; n <= 48; n += 16) {
784 for (size_t k = 1; k <= 40; k += 9) {
785 GemmMicrokernelTester()
786 .mr(4)
787 .nr(16)
788 .kr(1)
789 .sr(1)
790 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800791 .n(n)
Marat Dukhan69c8a292021-07-14 19:34:56 -0700792 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -0800793 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -0700794 }
795 }
796 }
797
798 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_div_16_strided_cn) {
799 TEST_REQUIRES_ARM_NEON;
800 for (uint32_t n = 32; n <= 48; n += 16) {
801 for (size_t k = 1; k <= 40; k += 9) {
802 GemmMicrokernelTester()
803 .mr(4)
804 .nr(16)
805 .kr(1)
806 .sr(1)
807 .m(4)
808 .n(n)
809 .k(k)
810 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -0800811 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -0700812 }
813 }
814 }
815
816 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_div_16_subtile) {
817 TEST_REQUIRES_ARM_NEON;
818 for (uint32_t n = 32; n <= 48; n += 16) {
819 for (size_t k = 1; k <= 40; k += 9) {
820 for (uint32_t m = 1; m <= 4; m++) {
821 GemmMicrokernelTester()
822 .mr(4)
823 .nr(16)
824 .kr(1)
825 .sr(1)
826 .m(m)
827 .n(n)
828 .k(k)
829 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800830 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -0700831 }
832 }
833 }
834 }
835
836 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, small_kernel) {
837 TEST_REQUIRES_ARM_NEON;
838 for (size_t k = 1; k <= 40; k += 9) {
839 GemmMicrokernelTester()
840 .mr(4)
841 .nr(16)
842 .kr(1)
843 .sr(1)
844 .m(4)
845 .n(16)
846 .k(k)
847 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -0800848 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -0700849 }
850 }
851
852 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, small_kernel_subtile) {
853 TEST_REQUIRES_ARM_NEON;
854 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800855 for (uint32_t n = 1; n <= 16; n++) {
856 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan69c8a292021-07-14 19:34:56 -0700857 GemmMicrokernelTester()
858 .mr(4)
859 .nr(16)
860 .kr(1)
861 .sr(1)
862 .m(m)
863 .n(n)
864 .k(k)
865 .ks(3)
866 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800867 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -0700868 }
869 }
870 }
871 }
872
873 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_gt_16_small_kernel) {
874 TEST_REQUIRES_ARM_NEON;
875 for (uint32_t n = 17; n < 32; n++) {
876 for (size_t k = 1; k <= 40; k += 9) {
877 GemmMicrokernelTester()
878 .mr(4)
879 .nr(16)
880 .kr(1)
881 .sr(1)
882 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800883 .n(n)
Marat Dukhan69c8a292021-07-14 19:34:56 -0700884 .k(k)
885 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -0800886 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -0700887 }
888 }
889 }
890
891 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_div_16_small_kernel) {
892 TEST_REQUIRES_ARM_NEON;
893 for (uint32_t n = 32; n <= 48; n += 16) {
894 for (size_t k = 1; k <= 40; k += 9) {
895 GemmMicrokernelTester()
896 .mr(4)
897 .nr(16)
898 .kr(1)
899 .sr(1)
900 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800901 .n(n)
Marat Dukhan69c8a292021-07-14 19:34:56 -0700902 .k(k)
903 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -0800904 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -0700905 }
906 }
907 }
908
909 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, strided_cm_subtile) {
910 TEST_REQUIRES_ARM_NEON;
911 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800912 for (uint32_t n = 1; n <= 16; n++) {
913 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan69c8a292021-07-14 19:34:56 -0700914 GemmMicrokernelTester()
915 .mr(4)
916 .nr(16)
917 .kr(1)
918 .sr(1)
919 .m(m)
920 .n(n)
921 .k(k)
922 .cm_stride(19)
923 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800924 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -0700925 }
926 }
927 }
928 }
929
930 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, a_offset) {
931 TEST_REQUIRES_ARM_NEON;
932 for (size_t k = 1; k <= 40; k += 9) {
933 GemmMicrokernelTester()
934 .mr(4)
935 .nr(16)
936 .kr(1)
937 .sr(1)
938 .m(4)
939 .n(16)
940 .k(k)
941 .ks(3)
942 .a_offset(163)
Marat Dukhan50323b82022-01-11 00:12:01 -0800943 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -0700944 }
945 }
946
947 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, zero) {
948 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -0800949 for (size_t k = 1; k <= 40; k += 9) {
950 for (uint32_t mz = 0; mz < 4; mz++) {
Marat Dukhan69c8a292021-07-14 19:34:56 -0700951 GemmMicrokernelTester()
952 .mr(4)
953 .nr(16)
954 .kr(1)
955 .sr(1)
956 .m(4)
957 .n(16)
958 .k(k)
959 .ks(3)
960 .a_offset(163)
961 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -0800962 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -0700963 }
964 }
965 }
966
967 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, qmin) {
968 TEST_REQUIRES_ARM_NEON;
969 GemmMicrokernelTester()
970 .mr(4)
971 .nr(16)
972 .kr(1)
973 .sr(1)
974 .m(4)
975 .n(16)
976 .k(8)
977 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800978 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -0700979 }
980
981 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, qmax) {
982 TEST_REQUIRES_ARM_NEON;
983 GemmMicrokernelTester()
984 .mr(4)
985 .nr(16)
986 .kr(1)
987 .sr(1)
988 .m(4)
989 .n(16)
990 .k(8)
991 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800992 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -0700993 }
994
995 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, strided_cm) {
996 TEST_REQUIRES_ARM_NEON;
997 GemmMicrokernelTester()
998 .mr(4)
999 .nr(16)
1000 .kr(1)
1001 .sr(1)
1002 .m(4)
1003 .n(16)
1004 .k(8)
1005 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08001006 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -07001007 }
1008
1009 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, no_a_zero_point) {
1010 TEST_REQUIRES_ARM_NEON;
1011 for (size_t k = 1; k <= 40; k += 9) {
1012 GemmMicrokernelTester()
1013 .mr(4)
1014 .nr(16)
1015 .kr(1)
1016 .sr(1)
1017 .m(4)
1018 .n(16)
1019 .k(k)
1020 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08001021 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -07001022 }
1023 }
1024
1025 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, no_b_zero_point) {
1026 TEST_REQUIRES_ARM_NEON;
1027 for (size_t k = 1; k <= 40; k += 9) {
1028 GemmMicrokernelTester()
1029 .mr(4)
1030 .nr(16)
1031 .kr(1)
1032 .sr(1)
1033 .m(4)
1034 .n(16)
1035 .k(k)
1036 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08001037 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -07001038 }
1039 }
1040
1041 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, no_zero_point) {
1042 TEST_REQUIRES_ARM_NEON;
1043 for (size_t k = 1; k <= 40; k += 9) {
1044 GemmMicrokernelTester()
1045 .mr(4)
1046 .nr(16)
1047 .kr(1)
1048 .sr(1)
1049 .m(4)
1050 .n(16)
1051 .k(k)
1052 .a_zero_point(0)
1053 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08001054 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neon_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -07001055 }
1056 }
1057#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1058
1059
1060#if XNN_ARCH_ARM || XNN_ARCH_ARM64
1061 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_eq_8) {
1062 TEST_REQUIRES_ARM_NEON_V8;
1063 GemmMicrokernelTester()
1064 .mr(1)
1065 .nr(16)
1066 .kr(1)
1067 .sr(1)
1068 .m(1)
1069 .n(16)
1070 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08001071 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -07001072 }
1073
1074 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, strided_cn) {
1075 TEST_REQUIRES_ARM_NEON_V8;
1076 GemmMicrokernelTester()
1077 .mr(1)
1078 .nr(16)
1079 .kr(1)
1080 .sr(1)
1081 .m(1)
1082 .n(16)
1083 .k(8)
1084 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08001085 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -07001086 }
1087
1088 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_eq_8_subtile) {
1089 TEST_REQUIRES_ARM_NEON_V8;
Zhi An Ng83844ae2022-01-14 09:52:25 -08001090 for (uint32_t n = 1; n <= 16; n++) {
1091 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan69c8a292021-07-14 19:34:56 -07001092 GemmMicrokernelTester()
1093 .mr(1)
1094 .nr(16)
1095 .kr(1)
1096 .sr(1)
1097 .m(m)
1098 .n(n)
1099 .k(8)
1100 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001101 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -07001102 }
1103 }
1104 }
1105
1106 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_eq_8_subtile_m) {
1107 TEST_REQUIRES_ARM_NEON_V8;
1108 for (uint32_t m = 1; m <= 1; m++) {
1109 GemmMicrokernelTester()
1110 .mr(1)
1111 .nr(16)
1112 .kr(1)
1113 .sr(1)
1114 .m(m)
1115 .n(16)
1116 .k(8)
1117 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001118 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -07001119 }
1120 }
1121
1122 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_eq_8_subtile_n) {
1123 TEST_REQUIRES_ARM_NEON_V8;
1124 for (uint32_t n = 1; n <= 16; n++) {
1125 GemmMicrokernelTester()
1126 .mr(1)
1127 .nr(16)
1128 .kr(1)
1129 .sr(1)
1130 .m(1)
1131 .n(n)
1132 .k(8)
1133 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001134 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -07001135 }
1136 }
1137
1138 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_lt_8) {
1139 TEST_REQUIRES_ARM_NEON_V8;
1140 for (size_t k = 1; k < 8; k++) {
1141 GemmMicrokernelTester()
1142 .mr(1)
1143 .nr(16)
1144 .kr(1)
1145 .sr(1)
1146 .m(1)
1147 .n(16)
1148 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001149 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -07001150 }
1151 }
1152
1153 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_lt_8_subtile) {
1154 TEST_REQUIRES_ARM_NEON_V8;
1155 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001156 for (uint32_t n = 1; n <= 16; n++) {
1157 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan69c8a292021-07-14 19:34:56 -07001158 GemmMicrokernelTester()
1159 .mr(1)
1160 .nr(16)
1161 .kr(1)
1162 .sr(1)
1163 .m(m)
1164 .n(n)
1165 .k(k)
1166 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001167 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -07001168 }
1169 }
1170 }
1171 }
1172
1173 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_gt_8) {
1174 TEST_REQUIRES_ARM_NEON_V8;
1175 for (size_t k = 9; k < 16; k++) {
1176 GemmMicrokernelTester()
1177 .mr(1)
1178 .nr(16)
1179 .kr(1)
1180 .sr(1)
1181 .m(1)
1182 .n(16)
1183 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001184 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -07001185 }
1186 }
1187
1188 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_gt_8_subtile) {
1189 TEST_REQUIRES_ARM_NEON_V8;
1190 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001191 for (uint32_t n = 1; n <= 16; n++) {
1192 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan69c8a292021-07-14 19:34:56 -07001193 GemmMicrokernelTester()
1194 .mr(1)
1195 .nr(16)
1196 .kr(1)
1197 .sr(1)
1198 .m(m)
1199 .n(n)
1200 .k(k)
1201 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001202 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -07001203 }
1204 }
1205 }
1206 }
1207
1208 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_div_8) {
1209 TEST_REQUIRES_ARM_NEON_V8;
1210 for (size_t k = 16; k <= 80; k += 8) {
1211 GemmMicrokernelTester()
1212 .mr(1)
1213 .nr(16)
1214 .kr(1)
1215 .sr(1)
1216 .m(1)
1217 .n(16)
1218 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001219 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -07001220 }
1221 }
1222
1223 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_div_8_subtile) {
1224 TEST_REQUIRES_ARM_NEON_V8;
1225 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001226 for (uint32_t n = 1; n <= 16; n++) {
1227 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan69c8a292021-07-14 19:34:56 -07001228 GemmMicrokernelTester()
1229 .mr(1)
1230 .nr(16)
1231 .kr(1)
1232 .sr(1)
1233 .m(m)
1234 .n(n)
1235 .k(k)
1236 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001237 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -07001238 }
1239 }
1240 }
1241 }
1242
1243 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, n_gt_16) {
1244 TEST_REQUIRES_ARM_NEON_V8;
1245 for (uint32_t n = 17; n < 32; n++) {
1246 for (size_t k = 1; k <= 40; k += 9) {
1247 GemmMicrokernelTester()
1248 .mr(1)
1249 .nr(16)
1250 .kr(1)
1251 .sr(1)
1252 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001253 .n(n)
Marat Dukhan69c8a292021-07-14 19:34:56 -07001254 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001255 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -07001256 }
1257 }
1258 }
1259
1260 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, n_gt_16_strided_cn) {
1261 TEST_REQUIRES_ARM_NEON_V8;
1262 for (uint32_t n = 17; n < 32; n++) {
1263 for (size_t k = 1; k <= 40; k += 9) {
1264 GemmMicrokernelTester()
1265 .mr(1)
1266 .nr(16)
1267 .kr(1)
1268 .sr(1)
1269 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001270 .n(n)
Marat Dukhan69c8a292021-07-14 19:34:56 -07001271 .k(k)
1272 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08001273 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -07001274 }
1275 }
1276 }
1277
1278 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, n_gt_16_subtile) {
1279 TEST_REQUIRES_ARM_NEON_V8;
1280 for (uint32_t n = 17; n < 32; n++) {
1281 for (size_t k = 1; k <= 40; k += 9) {
1282 for (uint32_t m = 1; m <= 1; m++) {
1283 GemmMicrokernelTester()
1284 .mr(1)
1285 .nr(16)
1286 .kr(1)
1287 .sr(1)
1288 .m(m)
1289 .n(n)
1290 .k(k)
1291 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001292 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -07001293 }
1294 }
1295 }
1296 }
1297
1298 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, n_div_16) {
1299 TEST_REQUIRES_ARM_NEON_V8;
1300 for (uint32_t n = 32; n <= 48; n += 16) {
1301 for (size_t k = 1; k <= 40; k += 9) {
1302 GemmMicrokernelTester()
1303 .mr(1)
1304 .nr(16)
1305 .kr(1)
1306 .sr(1)
1307 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001308 .n(n)
Marat Dukhan69c8a292021-07-14 19:34:56 -07001309 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001310 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -07001311 }
1312 }
1313 }
1314
1315 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, n_div_16_strided_cn) {
1316 TEST_REQUIRES_ARM_NEON_V8;
1317 for (uint32_t n = 32; n <= 48; n += 16) {
1318 for (size_t k = 1; k <= 40; k += 9) {
1319 GemmMicrokernelTester()
1320 .mr(1)
1321 .nr(16)
1322 .kr(1)
1323 .sr(1)
1324 .m(1)
1325 .n(n)
1326 .k(k)
1327 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08001328 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -07001329 }
1330 }
1331 }
1332
1333 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, n_div_16_subtile) {
1334 TEST_REQUIRES_ARM_NEON_V8;
1335 for (uint32_t n = 32; n <= 48; n += 16) {
1336 for (size_t k = 1; k <= 40; k += 9) {
1337 for (uint32_t m = 1; m <= 1; m++) {
1338 GemmMicrokernelTester()
1339 .mr(1)
1340 .nr(16)
1341 .kr(1)
1342 .sr(1)
1343 .m(m)
1344 .n(n)
1345 .k(k)
1346 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001347 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -07001348 }
1349 }
1350 }
1351 }
1352
1353 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, small_kernel) {
1354 TEST_REQUIRES_ARM_NEON_V8;
1355 for (size_t k = 1; k <= 40; k += 9) {
1356 GemmMicrokernelTester()
1357 .mr(1)
1358 .nr(16)
1359 .kr(1)
1360 .sr(1)
1361 .m(1)
1362 .n(16)
1363 .k(k)
1364 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08001365 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -07001366 }
1367 }
1368
1369 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, small_kernel_subtile) {
1370 TEST_REQUIRES_ARM_NEON_V8;
1371 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001372 for (uint32_t n = 1; n <= 16; n++) {
1373 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan69c8a292021-07-14 19:34:56 -07001374 GemmMicrokernelTester()
1375 .mr(1)
1376 .nr(16)
1377 .kr(1)
1378 .sr(1)
1379 .m(m)
1380 .n(n)
1381 .k(k)
1382 .ks(3)
1383 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001384 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -07001385 }
1386 }
1387 }
1388 }
1389
1390 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, n_gt_16_small_kernel) {
1391 TEST_REQUIRES_ARM_NEON_V8;
1392 for (uint32_t n = 17; n < 32; n++) {
1393 for (size_t k = 1; k <= 40; k += 9) {
1394 GemmMicrokernelTester()
1395 .mr(1)
1396 .nr(16)
1397 .kr(1)
1398 .sr(1)
1399 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001400 .n(n)
Marat Dukhan69c8a292021-07-14 19:34:56 -07001401 .k(k)
1402 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08001403 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -07001404 }
1405 }
1406 }
1407
1408 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, n_div_16_small_kernel) {
1409 TEST_REQUIRES_ARM_NEON_V8;
1410 for (uint32_t n = 32; n <= 48; n += 16) {
1411 for (size_t k = 1; k <= 40; k += 9) {
1412 GemmMicrokernelTester()
1413 .mr(1)
1414 .nr(16)
1415 .kr(1)
1416 .sr(1)
1417 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001418 .n(n)
Marat Dukhan69c8a292021-07-14 19:34:56 -07001419 .k(k)
1420 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08001421 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -07001422 }
1423 }
1424 }
1425
1426 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, strided_cm_subtile) {
1427 TEST_REQUIRES_ARM_NEON_V8;
1428 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001429 for (uint32_t n = 1; n <= 16; n++) {
1430 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan69c8a292021-07-14 19:34:56 -07001431 GemmMicrokernelTester()
1432 .mr(1)
1433 .nr(16)
1434 .kr(1)
1435 .sr(1)
1436 .m(m)
1437 .n(n)
1438 .k(k)
1439 .cm_stride(19)
1440 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001441 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -07001442 }
1443 }
1444 }
1445 }
1446
1447 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, a_offset) {
1448 TEST_REQUIRES_ARM_NEON_V8;
1449 for (size_t k = 1; k <= 40; k += 9) {
1450 GemmMicrokernelTester()
1451 .mr(1)
1452 .nr(16)
1453 .kr(1)
1454 .sr(1)
1455 .m(1)
1456 .n(16)
1457 .k(k)
1458 .ks(3)
1459 .a_offset(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08001460 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -07001461 }
1462 }
1463
1464 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, zero) {
1465 TEST_REQUIRES_ARM_NEON_V8;
Zhi An Ng83844ae2022-01-14 09:52:25 -08001466 for (size_t k = 1; k <= 40; k += 9) {
1467 for (uint32_t mz = 0; mz < 1; mz++) {
Marat Dukhan69c8a292021-07-14 19:34:56 -07001468 GemmMicrokernelTester()
1469 .mr(1)
1470 .nr(16)
1471 .kr(1)
1472 .sr(1)
1473 .m(1)
1474 .n(16)
1475 .k(k)
1476 .ks(3)
1477 .a_offset(43)
1478 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08001479 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -07001480 }
1481 }
1482 }
1483
1484 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, qmin) {
1485 TEST_REQUIRES_ARM_NEON_V8;
1486 GemmMicrokernelTester()
1487 .mr(1)
1488 .nr(16)
1489 .kr(1)
1490 .sr(1)
1491 .m(1)
1492 .n(16)
1493 .k(8)
1494 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001495 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -07001496 }
1497
1498 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, qmax) {
1499 TEST_REQUIRES_ARM_NEON_V8;
1500 GemmMicrokernelTester()
1501 .mr(1)
1502 .nr(16)
1503 .kr(1)
1504 .sr(1)
1505 .m(1)
1506 .n(16)
1507 .k(8)
1508 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001509 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -07001510 }
1511
1512 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, strided_cm) {
1513 TEST_REQUIRES_ARM_NEON_V8;
1514 GemmMicrokernelTester()
1515 .mr(1)
1516 .nr(16)
1517 .kr(1)
1518 .sr(1)
1519 .m(1)
1520 .n(16)
1521 .k(8)
1522 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08001523 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -07001524 }
1525
1526 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, no_a_zero_point) {
1527 TEST_REQUIRES_ARM_NEON_V8;
1528 for (size_t k = 1; k <= 40; k += 9) {
1529 GemmMicrokernelTester()
1530 .mr(1)
1531 .nr(16)
1532 .kr(1)
1533 .sr(1)
1534 .m(1)
1535 .n(16)
1536 .k(k)
1537 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08001538 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -07001539 }
1540 }
1541
1542 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, no_b_zero_point) {
1543 TEST_REQUIRES_ARM_NEON_V8;
1544 for (size_t k = 1; k <= 40; k += 9) {
1545 GemmMicrokernelTester()
1546 .mr(1)
1547 .nr(16)
1548 .kr(1)
1549 .sr(1)
1550 .m(1)
1551 .n(16)
1552 .k(k)
1553 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08001554 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -07001555 }
1556 }
1557
1558 TEST(QU8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, no_zero_point) {
1559 TEST_REQUIRES_ARM_NEON_V8;
1560 for (size_t k = 1; k <= 40; k += 9) {
1561 GemmMicrokernelTester()
1562 .mr(1)
1563 .nr(16)
1564 .kr(1)
1565 .sr(1)
1566 .m(1)
1567 .n(16)
1568 .k(k)
1569 .a_zero_point(0)
1570 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08001571 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -07001572 }
1573 }
1574#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1575
1576
1577#if XNN_ARCH_ARM || XNN_ARCH_ARM64
1578 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_eq_8) {
1579 TEST_REQUIRES_ARM_NEON_V8;
1580 GemmMicrokernelTester()
1581 .mr(4)
1582 .nr(16)
1583 .kr(1)
1584 .sr(1)
1585 .m(4)
1586 .n(16)
1587 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08001588 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -07001589 }
1590
1591 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, strided_cn) {
1592 TEST_REQUIRES_ARM_NEON_V8;
1593 GemmMicrokernelTester()
1594 .mr(4)
1595 .nr(16)
1596 .kr(1)
1597 .sr(1)
1598 .m(4)
1599 .n(16)
1600 .k(8)
1601 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08001602 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -07001603 }
1604
1605 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_eq_8_subtile) {
1606 TEST_REQUIRES_ARM_NEON_V8;
Zhi An Ng83844ae2022-01-14 09:52:25 -08001607 for (uint32_t n = 1; n <= 16; n++) {
1608 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan69c8a292021-07-14 19:34:56 -07001609 GemmMicrokernelTester()
1610 .mr(4)
1611 .nr(16)
1612 .kr(1)
1613 .sr(1)
1614 .m(m)
1615 .n(n)
1616 .k(8)
1617 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001618 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -07001619 }
1620 }
1621 }
1622
1623 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_eq_8_subtile_m) {
1624 TEST_REQUIRES_ARM_NEON_V8;
1625 for (uint32_t m = 1; m <= 4; m++) {
1626 GemmMicrokernelTester()
1627 .mr(4)
1628 .nr(16)
1629 .kr(1)
1630 .sr(1)
1631 .m(m)
1632 .n(16)
1633 .k(8)
1634 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001635 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -07001636 }
1637 }
1638
1639 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_eq_8_subtile_n) {
1640 TEST_REQUIRES_ARM_NEON_V8;
1641 for (uint32_t n = 1; n <= 16; n++) {
1642 GemmMicrokernelTester()
1643 .mr(4)
1644 .nr(16)
1645 .kr(1)
1646 .sr(1)
1647 .m(4)
1648 .n(n)
1649 .k(8)
1650 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001651 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -07001652 }
1653 }
1654
1655 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_lt_8) {
1656 TEST_REQUIRES_ARM_NEON_V8;
1657 for (size_t k = 1; k < 8; k++) {
1658 GemmMicrokernelTester()
1659 .mr(4)
1660 .nr(16)
1661 .kr(1)
1662 .sr(1)
1663 .m(4)
1664 .n(16)
1665 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001666 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -07001667 }
1668 }
1669
1670 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_lt_8_subtile) {
1671 TEST_REQUIRES_ARM_NEON_V8;
1672 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001673 for (uint32_t n = 1; n <= 16; n++) {
1674 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan69c8a292021-07-14 19:34:56 -07001675 GemmMicrokernelTester()
1676 .mr(4)
1677 .nr(16)
1678 .kr(1)
1679 .sr(1)
1680 .m(m)
1681 .n(n)
1682 .k(k)
1683 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001684 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -07001685 }
1686 }
1687 }
1688 }
1689
1690 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_gt_8) {
1691 TEST_REQUIRES_ARM_NEON_V8;
1692 for (size_t k = 9; k < 16; k++) {
1693 GemmMicrokernelTester()
1694 .mr(4)
1695 .nr(16)
1696 .kr(1)
1697 .sr(1)
1698 .m(4)
1699 .n(16)
1700 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001701 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -07001702 }
1703 }
1704
1705 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_gt_8_subtile) {
1706 TEST_REQUIRES_ARM_NEON_V8;
1707 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001708 for (uint32_t n = 1; n <= 16; n++) {
1709 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan69c8a292021-07-14 19:34:56 -07001710 GemmMicrokernelTester()
1711 .mr(4)
1712 .nr(16)
1713 .kr(1)
1714 .sr(1)
1715 .m(m)
1716 .n(n)
1717 .k(k)
1718 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001719 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -07001720 }
1721 }
1722 }
1723 }
1724
1725 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_div_8) {
1726 TEST_REQUIRES_ARM_NEON_V8;
1727 for (size_t k = 16; k <= 80; k += 8) {
1728 GemmMicrokernelTester()
1729 .mr(4)
1730 .nr(16)
1731 .kr(1)
1732 .sr(1)
1733 .m(4)
1734 .n(16)
1735 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001736 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -07001737 }
1738 }
1739
1740 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_div_8_subtile) {
1741 TEST_REQUIRES_ARM_NEON_V8;
1742 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001743 for (uint32_t n = 1; n <= 16; n++) {
1744 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan69c8a292021-07-14 19:34:56 -07001745 GemmMicrokernelTester()
1746 .mr(4)
1747 .nr(16)
1748 .kr(1)
1749 .sr(1)
1750 .m(m)
1751 .n(n)
1752 .k(k)
1753 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001754 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -07001755 }
1756 }
1757 }
1758 }
1759
1760 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, n_gt_16) {
1761 TEST_REQUIRES_ARM_NEON_V8;
1762 for (uint32_t n = 17; n < 32; n++) {
1763 for (size_t k = 1; k <= 40; k += 9) {
1764 GemmMicrokernelTester()
1765 .mr(4)
1766 .nr(16)
1767 .kr(1)
1768 .sr(1)
1769 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001770 .n(n)
Marat Dukhan69c8a292021-07-14 19:34:56 -07001771 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001772 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -07001773 }
1774 }
1775 }
1776
1777 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, n_gt_16_strided_cn) {
1778 TEST_REQUIRES_ARM_NEON_V8;
1779 for (uint32_t n = 17; n < 32; n++) {
1780 for (size_t k = 1; k <= 40; k += 9) {
1781 GemmMicrokernelTester()
1782 .mr(4)
1783 .nr(16)
1784 .kr(1)
1785 .sr(1)
1786 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001787 .n(n)
Marat Dukhan69c8a292021-07-14 19:34:56 -07001788 .k(k)
1789 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08001790 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -07001791 }
1792 }
1793 }
1794
1795 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, n_gt_16_subtile) {
1796 TEST_REQUIRES_ARM_NEON_V8;
1797 for (uint32_t n = 17; n < 32; n++) {
1798 for (size_t k = 1; k <= 40; k += 9) {
1799 for (uint32_t m = 1; m <= 4; m++) {
1800 GemmMicrokernelTester()
1801 .mr(4)
1802 .nr(16)
1803 .kr(1)
1804 .sr(1)
1805 .m(m)
1806 .n(n)
1807 .k(k)
1808 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001809 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -07001810 }
1811 }
1812 }
1813 }
1814
1815 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, n_div_16) {
1816 TEST_REQUIRES_ARM_NEON_V8;
1817 for (uint32_t n = 32; n <= 48; n += 16) {
1818 for (size_t k = 1; k <= 40; k += 9) {
1819 GemmMicrokernelTester()
1820 .mr(4)
1821 .nr(16)
1822 .kr(1)
1823 .sr(1)
1824 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001825 .n(n)
Marat Dukhan69c8a292021-07-14 19:34:56 -07001826 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001827 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -07001828 }
1829 }
1830 }
1831
1832 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, n_div_16_strided_cn) {
1833 TEST_REQUIRES_ARM_NEON_V8;
1834 for (uint32_t n = 32; n <= 48; n += 16) {
1835 for (size_t k = 1; k <= 40; k += 9) {
1836 GemmMicrokernelTester()
1837 .mr(4)
1838 .nr(16)
1839 .kr(1)
1840 .sr(1)
1841 .m(4)
1842 .n(n)
1843 .k(k)
1844 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08001845 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -07001846 }
1847 }
1848 }
1849
1850 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, n_div_16_subtile) {
1851 TEST_REQUIRES_ARM_NEON_V8;
1852 for (uint32_t n = 32; n <= 48; n += 16) {
1853 for (size_t k = 1; k <= 40; k += 9) {
1854 for (uint32_t m = 1; m <= 4; m++) {
1855 GemmMicrokernelTester()
1856 .mr(4)
1857 .nr(16)
1858 .kr(1)
1859 .sr(1)
1860 .m(m)
1861 .n(n)
1862 .k(k)
1863 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001864 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -07001865 }
1866 }
1867 }
1868 }
1869
1870 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, small_kernel) {
1871 TEST_REQUIRES_ARM_NEON_V8;
1872 for (size_t k = 1; k <= 40; k += 9) {
1873 GemmMicrokernelTester()
1874 .mr(4)
1875 .nr(16)
1876 .kr(1)
1877 .sr(1)
1878 .m(4)
1879 .n(16)
1880 .k(k)
1881 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08001882 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -07001883 }
1884 }
1885
1886 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, small_kernel_subtile) {
1887 TEST_REQUIRES_ARM_NEON_V8;
1888 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001889 for (uint32_t n = 1; n <= 16; n++) {
1890 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan69c8a292021-07-14 19:34:56 -07001891 GemmMicrokernelTester()
1892 .mr(4)
1893 .nr(16)
1894 .kr(1)
1895 .sr(1)
1896 .m(m)
1897 .n(n)
1898 .k(k)
1899 .ks(3)
1900 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001901 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -07001902 }
1903 }
1904 }
1905 }
1906
1907 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, n_gt_16_small_kernel) {
1908 TEST_REQUIRES_ARM_NEON_V8;
1909 for (uint32_t n = 17; n < 32; n++) {
1910 for (size_t k = 1; k <= 40; k += 9) {
1911 GemmMicrokernelTester()
1912 .mr(4)
1913 .nr(16)
1914 .kr(1)
1915 .sr(1)
1916 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001917 .n(n)
Marat Dukhan69c8a292021-07-14 19:34:56 -07001918 .k(k)
1919 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08001920 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -07001921 }
1922 }
1923 }
1924
1925 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, n_div_16_small_kernel) {
1926 TEST_REQUIRES_ARM_NEON_V8;
1927 for (uint32_t n = 32; n <= 48; n += 16) {
1928 for (size_t k = 1; k <= 40; k += 9) {
1929 GemmMicrokernelTester()
1930 .mr(4)
1931 .nr(16)
1932 .kr(1)
1933 .sr(1)
1934 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001935 .n(n)
Marat Dukhan69c8a292021-07-14 19:34:56 -07001936 .k(k)
1937 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08001938 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -07001939 }
1940 }
1941 }
1942
1943 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, strided_cm_subtile) {
1944 TEST_REQUIRES_ARM_NEON_V8;
1945 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001946 for (uint32_t n = 1; n <= 16; n++) {
1947 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan69c8a292021-07-14 19:34:56 -07001948 GemmMicrokernelTester()
1949 .mr(4)
1950 .nr(16)
1951 .kr(1)
1952 .sr(1)
1953 .m(m)
1954 .n(n)
1955 .k(k)
1956 .cm_stride(19)
1957 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001958 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -07001959 }
1960 }
1961 }
1962 }
1963
1964 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, a_offset) {
1965 TEST_REQUIRES_ARM_NEON_V8;
1966 for (size_t k = 1; k <= 40; k += 9) {
1967 GemmMicrokernelTester()
1968 .mr(4)
1969 .nr(16)
1970 .kr(1)
1971 .sr(1)
1972 .m(4)
1973 .n(16)
1974 .k(k)
1975 .ks(3)
1976 .a_offset(163)
Marat Dukhan50323b82022-01-11 00:12:01 -08001977 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -07001978 }
1979 }
1980
1981 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, zero) {
1982 TEST_REQUIRES_ARM_NEON_V8;
Zhi An Ng83844ae2022-01-14 09:52:25 -08001983 for (size_t k = 1; k <= 40; k += 9) {
1984 for (uint32_t mz = 0; mz < 4; mz++) {
Marat Dukhan69c8a292021-07-14 19:34:56 -07001985 GemmMicrokernelTester()
1986 .mr(4)
1987 .nr(16)
1988 .kr(1)
1989 .sr(1)
1990 .m(4)
1991 .n(16)
1992 .k(k)
1993 .ks(3)
1994 .a_offset(163)
1995 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08001996 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -07001997 }
1998 }
1999 }
2000
2001 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, qmin) {
2002 TEST_REQUIRES_ARM_NEON_V8;
2003 GemmMicrokernelTester()
2004 .mr(4)
2005 .nr(16)
2006 .kr(1)
2007 .sr(1)
2008 .m(4)
2009 .n(16)
2010 .k(8)
2011 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002012 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -07002013 }
2014
2015 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, qmax) {
2016 TEST_REQUIRES_ARM_NEON_V8;
2017 GemmMicrokernelTester()
2018 .mr(4)
2019 .nr(16)
2020 .kr(1)
2021 .sr(1)
2022 .m(4)
2023 .n(16)
2024 .k(8)
2025 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002026 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -07002027 }
2028
2029 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, strided_cm) {
2030 TEST_REQUIRES_ARM_NEON_V8;
2031 GemmMicrokernelTester()
2032 .mr(4)
2033 .nr(16)
2034 .kr(1)
2035 .sr(1)
2036 .m(4)
2037 .n(16)
2038 .k(8)
2039 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08002040 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -07002041 }
2042
2043 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, no_a_zero_point) {
2044 TEST_REQUIRES_ARM_NEON_V8;
2045 for (size_t k = 1; k <= 40; k += 9) {
2046 GemmMicrokernelTester()
2047 .mr(4)
2048 .nr(16)
2049 .kr(1)
2050 .sr(1)
2051 .m(4)
2052 .n(16)
2053 .k(k)
2054 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08002055 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -07002056 }
2057 }
2058
2059 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, no_b_zero_point) {
2060 TEST_REQUIRES_ARM_NEON_V8;
2061 for (size_t k = 1; k <= 40; k += 9) {
2062 GemmMicrokernelTester()
2063 .mr(4)
2064 .nr(16)
2065 .kr(1)
2066 .sr(1)
2067 .m(4)
2068 .n(16)
2069 .k(k)
2070 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08002071 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -07002072 }
2073 }
2074
2075 TEST(QU8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, no_zero_point) {
2076 TEST_REQUIRES_ARM_NEON_V8;
2077 for (size_t k = 1; k <= 40; k += 9) {
2078 GemmMicrokernelTester()
2079 .mr(4)
2080 .nr(16)
2081 .kr(1)
2082 .sr(1)
2083 .m(4)
2084 .n(16)
2085 .k(k)
2086 .a_zero_point(0)
2087 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08002088 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Marat Dukhan69c8a292021-07-14 19:34:56 -07002089 }
2090 }
2091#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2092
2093
Digant Desai9982ed32021-11-24 13:03:54 -08002094#if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
2095 TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, k_eq_8) {
2096 TEST_REQUIRES_ARM_NEON_DOT;
2097 GemmMicrokernelTester()
2098 .mr(1)
2099 .nr(16)
2100 .kr(4)
2101 .sr(1)
2102 .m(1)
2103 .n(16)
2104 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08002105 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Digant Desai9982ed32021-11-24 13:03:54 -08002106 }
2107
2108 TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, strided_cn) {
2109 TEST_REQUIRES_ARM_NEON_DOT;
2110 GemmMicrokernelTester()
2111 .mr(1)
2112 .nr(16)
2113 .kr(4)
2114 .sr(1)
2115 .m(1)
2116 .n(16)
2117 .k(8)
2118 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08002119 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Digant Desai9982ed32021-11-24 13:03:54 -08002120 }
2121
2122 TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, k_eq_8_subtile) {
2123 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng83844ae2022-01-14 09:52:25 -08002124 for (uint32_t n = 1; n <= 16; n++) {
2125 for (uint32_t m = 1; m <= 1; m++) {
Digant Desai9982ed32021-11-24 13:03:54 -08002126 GemmMicrokernelTester()
2127 .mr(1)
2128 .nr(16)
2129 .kr(4)
2130 .sr(1)
2131 .m(m)
2132 .n(n)
2133 .k(8)
2134 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002135 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Digant Desai9982ed32021-11-24 13:03:54 -08002136 }
2137 }
2138 }
2139
2140 TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, k_eq_8_subtile_m) {
2141 TEST_REQUIRES_ARM_NEON_DOT;
2142 for (uint32_t m = 1; m <= 1; m++) {
2143 GemmMicrokernelTester()
2144 .mr(1)
2145 .nr(16)
2146 .kr(4)
2147 .sr(1)
2148 .m(m)
2149 .n(16)
2150 .k(8)
2151 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002152 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Digant Desai9982ed32021-11-24 13:03:54 -08002153 }
2154 }
2155
2156 TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, k_eq_8_subtile_n) {
2157 TEST_REQUIRES_ARM_NEON_DOT;
2158 for (uint32_t n = 1; n <= 16; n++) {
2159 GemmMicrokernelTester()
2160 .mr(1)
2161 .nr(16)
2162 .kr(4)
2163 .sr(1)
2164 .m(1)
2165 .n(n)
2166 .k(8)
2167 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002168 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Digant Desai9982ed32021-11-24 13:03:54 -08002169 }
2170 }
2171
2172 TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, k_lt_8) {
2173 TEST_REQUIRES_ARM_NEON_DOT;
2174 for (size_t k = 1; k < 8; k++) {
2175 GemmMicrokernelTester()
2176 .mr(1)
2177 .nr(16)
2178 .kr(4)
2179 .sr(1)
2180 .m(1)
2181 .n(16)
2182 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002183 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Digant Desai9982ed32021-11-24 13:03:54 -08002184 }
2185 }
2186
2187 TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, k_lt_8_subtile) {
2188 TEST_REQUIRES_ARM_NEON_DOT;
2189 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002190 for (uint32_t n = 1; n <= 16; n++) {
2191 for (uint32_t m = 1; m <= 1; m++) {
Digant Desai9982ed32021-11-24 13:03:54 -08002192 GemmMicrokernelTester()
2193 .mr(1)
2194 .nr(16)
2195 .kr(4)
2196 .sr(1)
2197 .m(m)
2198 .n(n)
2199 .k(k)
2200 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002201 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Digant Desai9982ed32021-11-24 13:03:54 -08002202 }
2203 }
2204 }
2205 }
2206
2207 TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, k_gt_8) {
2208 TEST_REQUIRES_ARM_NEON_DOT;
2209 for (size_t k = 9; k < 16; k++) {
2210 GemmMicrokernelTester()
2211 .mr(1)
2212 .nr(16)
2213 .kr(4)
2214 .sr(1)
2215 .m(1)
2216 .n(16)
2217 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002218 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Digant Desai9982ed32021-11-24 13:03:54 -08002219 }
2220 }
2221
2222 TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, k_gt_8_subtile) {
2223 TEST_REQUIRES_ARM_NEON_DOT;
2224 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002225 for (uint32_t n = 1; n <= 16; n++) {
2226 for (uint32_t m = 1; m <= 1; m++) {
Digant Desai9982ed32021-11-24 13:03:54 -08002227 GemmMicrokernelTester()
2228 .mr(1)
2229 .nr(16)
2230 .kr(4)
2231 .sr(1)
2232 .m(m)
2233 .n(n)
2234 .k(k)
2235 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002236 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Digant Desai9982ed32021-11-24 13:03:54 -08002237 }
2238 }
2239 }
2240 }
2241
2242 TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, k_div_8) {
2243 TEST_REQUIRES_ARM_NEON_DOT;
2244 for (size_t k = 16; k <= 80; k += 8) {
2245 GemmMicrokernelTester()
2246 .mr(1)
2247 .nr(16)
2248 .kr(4)
2249 .sr(1)
2250 .m(1)
2251 .n(16)
2252 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002253 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Digant Desai9982ed32021-11-24 13:03:54 -08002254 }
2255 }
2256
2257 TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, k_div_8_subtile) {
2258 TEST_REQUIRES_ARM_NEON_DOT;
2259 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002260 for (uint32_t n = 1; n <= 16; n++) {
2261 for (uint32_t m = 1; m <= 1; m++) {
Digant Desai9982ed32021-11-24 13:03:54 -08002262 GemmMicrokernelTester()
2263 .mr(1)
2264 .nr(16)
2265 .kr(4)
2266 .sr(1)
2267 .m(m)
2268 .n(n)
2269 .k(k)
2270 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002271 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Digant Desai9982ed32021-11-24 13:03:54 -08002272 }
2273 }
2274 }
2275 }
2276
2277 TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, n_gt_16) {
2278 TEST_REQUIRES_ARM_NEON_DOT;
2279 for (uint32_t n = 17; n < 32; n++) {
2280 for (size_t k = 1; k <= 40; k += 9) {
2281 GemmMicrokernelTester()
2282 .mr(1)
2283 .nr(16)
2284 .kr(4)
2285 .sr(1)
2286 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002287 .n(n)
Digant Desai9982ed32021-11-24 13:03:54 -08002288 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002289 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Digant Desai9982ed32021-11-24 13:03:54 -08002290 }
2291 }
2292 }
2293
2294 TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, n_gt_16_strided_cn) {
2295 TEST_REQUIRES_ARM_NEON_DOT;
2296 for (uint32_t n = 17; n < 32; n++) {
2297 for (size_t k = 1; k <= 40; k += 9) {
2298 GemmMicrokernelTester()
2299 .mr(1)
2300 .nr(16)
2301 .kr(4)
2302 .sr(1)
2303 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002304 .n(n)
Digant Desai9982ed32021-11-24 13:03:54 -08002305 .k(k)
2306 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08002307 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Digant Desai9982ed32021-11-24 13:03:54 -08002308 }
2309 }
2310 }
2311
2312 TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, n_gt_16_subtile) {
2313 TEST_REQUIRES_ARM_NEON_DOT;
2314 for (uint32_t n = 17; n < 32; n++) {
2315 for (size_t k = 1; k <= 40; k += 9) {
2316 for (uint32_t m = 1; m <= 1; m++) {
2317 GemmMicrokernelTester()
2318 .mr(1)
2319 .nr(16)
2320 .kr(4)
2321 .sr(1)
2322 .m(m)
2323 .n(n)
2324 .k(k)
2325 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002326 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Digant Desai9982ed32021-11-24 13:03:54 -08002327 }
2328 }
2329 }
2330 }
2331
2332 TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, n_div_16) {
2333 TEST_REQUIRES_ARM_NEON_DOT;
2334 for (uint32_t n = 32; n <= 48; n += 16) {
2335 for (size_t k = 1; k <= 40; k += 9) {
2336 GemmMicrokernelTester()
2337 .mr(1)
2338 .nr(16)
2339 .kr(4)
2340 .sr(1)
2341 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002342 .n(n)
Digant Desai9982ed32021-11-24 13:03:54 -08002343 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002344 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Digant Desai9982ed32021-11-24 13:03:54 -08002345 }
2346 }
2347 }
2348
2349 TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, n_div_16_strided_cn) {
2350 TEST_REQUIRES_ARM_NEON_DOT;
2351 for (uint32_t n = 32; n <= 48; n += 16) {
2352 for (size_t k = 1; k <= 40; k += 9) {
2353 GemmMicrokernelTester()
2354 .mr(1)
2355 .nr(16)
2356 .kr(4)
2357 .sr(1)
2358 .m(1)
2359 .n(n)
2360 .k(k)
2361 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08002362 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Digant Desai9982ed32021-11-24 13:03:54 -08002363 }
2364 }
2365 }
2366
2367 TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, n_div_16_subtile) {
2368 TEST_REQUIRES_ARM_NEON_DOT;
2369 for (uint32_t n = 32; n <= 48; n += 16) {
2370 for (size_t k = 1; k <= 40; k += 9) {
2371 for (uint32_t m = 1; m <= 1; m++) {
2372 GemmMicrokernelTester()
2373 .mr(1)
2374 .nr(16)
2375 .kr(4)
2376 .sr(1)
2377 .m(m)
2378 .n(n)
2379 .k(k)
2380 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002381 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Digant Desai9982ed32021-11-24 13:03:54 -08002382 }
2383 }
2384 }
2385 }
2386
2387 TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, small_kernel) {
2388 TEST_REQUIRES_ARM_NEON_DOT;
2389 for (size_t k = 1; k <= 40; k += 9) {
2390 GemmMicrokernelTester()
2391 .mr(1)
2392 .nr(16)
2393 .kr(4)
2394 .sr(1)
2395 .m(1)
2396 .n(16)
2397 .k(k)
2398 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08002399 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Digant Desai9982ed32021-11-24 13:03:54 -08002400 }
2401 }
2402
2403 TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, small_kernel_subtile) {
2404 TEST_REQUIRES_ARM_NEON_DOT;
2405 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002406 for (uint32_t n = 1; n <= 16; n++) {
2407 for (uint32_t m = 1; m <= 1; m++) {
Digant Desai9982ed32021-11-24 13:03:54 -08002408 GemmMicrokernelTester()
2409 .mr(1)
2410 .nr(16)
2411 .kr(4)
2412 .sr(1)
2413 .m(m)
2414 .n(n)
2415 .k(k)
2416 .ks(3)
2417 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002418 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Digant Desai9982ed32021-11-24 13:03:54 -08002419 }
2420 }
2421 }
2422 }
2423
2424 TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, n_gt_16_small_kernel) {
2425 TEST_REQUIRES_ARM_NEON_DOT;
2426 for (uint32_t n = 17; n < 32; n++) {
2427 for (size_t k = 1; k <= 40; k += 9) {
2428 GemmMicrokernelTester()
2429 .mr(1)
2430 .nr(16)
2431 .kr(4)
2432 .sr(1)
2433 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002434 .n(n)
Digant Desai9982ed32021-11-24 13:03:54 -08002435 .k(k)
2436 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08002437 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Digant Desai9982ed32021-11-24 13:03:54 -08002438 }
2439 }
2440 }
2441
2442 TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, n_div_16_small_kernel) {
2443 TEST_REQUIRES_ARM_NEON_DOT;
2444 for (uint32_t n = 32; n <= 48; n += 16) {
2445 for (size_t k = 1; k <= 40; k += 9) {
2446 GemmMicrokernelTester()
2447 .mr(1)
2448 .nr(16)
2449 .kr(4)
2450 .sr(1)
2451 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002452 .n(n)
Digant Desai9982ed32021-11-24 13:03:54 -08002453 .k(k)
2454 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08002455 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Digant Desai9982ed32021-11-24 13:03:54 -08002456 }
2457 }
2458 }
2459
2460 TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, strided_cm_subtile) {
2461 TEST_REQUIRES_ARM_NEON_DOT;
2462 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002463 for (uint32_t n = 1; n <= 16; n++) {
2464 for (uint32_t m = 1; m <= 1; m++) {
Digant Desai9982ed32021-11-24 13:03:54 -08002465 GemmMicrokernelTester()
2466 .mr(1)
2467 .nr(16)
2468 .kr(4)
2469 .sr(1)
2470 .m(m)
2471 .n(n)
2472 .k(k)
2473 .cm_stride(19)
2474 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002475 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Digant Desai9982ed32021-11-24 13:03:54 -08002476 }
2477 }
2478 }
2479 }
2480
2481 TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, a_offset) {
2482 TEST_REQUIRES_ARM_NEON_DOT;
2483 for (size_t k = 1; k <= 40; k += 9) {
2484 GemmMicrokernelTester()
2485 .mr(1)
2486 .nr(16)
2487 .kr(4)
2488 .sr(1)
2489 .m(1)
2490 .n(16)
2491 .k(k)
2492 .ks(3)
2493 .a_offset(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08002494 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Digant Desai9982ed32021-11-24 13:03:54 -08002495 }
2496 }
2497
2498 TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, zero) {
2499 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng83844ae2022-01-14 09:52:25 -08002500 for (size_t k = 1; k <= 40; k += 9) {
2501 for (uint32_t mz = 0; mz < 1; mz++) {
Digant Desai9982ed32021-11-24 13:03:54 -08002502 GemmMicrokernelTester()
2503 .mr(1)
2504 .nr(16)
2505 .kr(4)
2506 .sr(1)
2507 .m(1)
2508 .n(16)
2509 .k(k)
2510 .ks(3)
2511 .a_offset(43)
2512 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08002513 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Digant Desai9982ed32021-11-24 13:03:54 -08002514 }
2515 }
2516 }
2517
2518 TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, qmin) {
2519 TEST_REQUIRES_ARM_NEON_DOT;
2520 GemmMicrokernelTester()
2521 .mr(1)
2522 .nr(16)
2523 .kr(4)
2524 .sr(1)
2525 .m(1)
2526 .n(16)
2527 .k(8)
2528 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002529 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Digant Desai9982ed32021-11-24 13:03:54 -08002530 }
2531
2532 TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, qmax) {
2533 TEST_REQUIRES_ARM_NEON_DOT;
2534 GemmMicrokernelTester()
2535 .mr(1)
2536 .nr(16)
2537 .kr(4)
2538 .sr(1)
2539 .m(1)
2540 .n(16)
2541 .k(8)
2542 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002543 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Digant Desai9982ed32021-11-24 13:03:54 -08002544 }
2545
2546 TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, strided_cm) {
2547 TEST_REQUIRES_ARM_NEON_DOT;
2548 GemmMicrokernelTester()
2549 .mr(1)
2550 .nr(16)
2551 .kr(4)
2552 .sr(1)
2553 .m(1)
2554 .n(16)
2555 .k(8)
2556 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08002557 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Digant Desai9982ed32021-11-24 13:03:54 -08002558 }
2559
2560 TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, no_a_zero_point) {
2561 TEST_REQUIRES_ARM_NEON_DOT;
2562 for (size_t k = 1; k <= 40; k += 9) {
2563 GemmMicrokernelTester()
2564 .mr(1)
2565 .nr(16)
2566 .kr(4)
2567 .sr(1)
2568 .m(1)
2569 .n(16)
2570 .k(k)
2571 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08002572 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Digant Desai9982ed32021-11-24 13:03:54 -08002573 }
2574 }
2575
2576 TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, no_b_zero_point) {
2577 TEST_REQUIRES_ARM_NEON_DOT;
2578 for (size_t k = 1; k <= 40; k += 9) {
2579 GemmMicrokernelTester()
2580 .mr(1)
2581 .nr(16)
2582 .kr(4)
2583 .sr(1)
2584 .m(1)
2585 .n(16)
2586 .k(k)
2587 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08002588 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Digant Desai9982ed32021-11-24 13:03:54 -08002589 }
2590 }
2591
2592 TEST(QU8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, no_zero_point) {
2593 TEST_REQUIRES_ARM_NEON_DOT;
2594 for (size_t k = 1; k <= 40; k += 9) {
2595 GemmMicrokernelTester()
2596 .mr(1)
2597 .nr(16)
2598 .kr(4)
2599 .sr(1)
2600 .m(1)
2601 .n(16)
2602 .k(k)
2603 .a_zero_point(0)
2604 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08002605 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_fp32_neonv8_params, xnn_qu8_requantize_fp32);
Digant Desai9982ed32021-11-24 13:03:54 -08002606 }
2607 }
2608#endif // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
2609
2610
Marat Dukhanef47f8d2021-07-02 15:08:32 -07002611#if XNN_ARCH_X86 || XNN_ARCH_X86_64
2612 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_eq_8) {
2613 TEST_REQUIRES_X86_SSE2;
2614 GemmMicrokernelTester()
2615 .mr(3)
2616 .nr(4)
2617 .kr(2)
2618 .sr(1)
2619 .m(3)
2620 .n(4)
2621 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08002622 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07002623 }
2624
2625 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, strided_cn) {
2626 TEST_REQUIRES_X86_SSE2;
2627 GemmMicrokernelTester()
2628 .mr(3)
2629 .nr(4)
2630 .kr(2)
2631 .sr(1)
2632 .m(3)
2633 .n(4)
2634 .k(8)
2635 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08002636 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07002637 }
2638
2639 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_eq_8_subtile) {
2640 TEST_REQUIRES_X86_SSE2;
Zhi An Ng83844ae2022-01-14 09:52:25 -08002641 for (uint32_t n = 1; n <= 4; n++) {
2642 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07002643 GemmMicrokernelTester()
2644 .mr(3)
2645 .nr(4)
2646 .kr(2)
2647 .sr(1)
2648 .m(m)
2649 .n(n)
2650 .k(8)
2651 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002652 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07002653 }
2654 }
2655 }
2656
2657 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_eq_8_subtile_m) {
2658 TEST_REQUIRES_X86_SSE2;
2659 for (uint32_t m = 1; m <= 3; m++) {
2660 GemmMicrokernelTester()
2661 .mr(3)
2662 .nr(4)
2663 .kr(2)
2664 .sr(1)
2665 .m(m)
2666 .n(4)
2667 .k(8)
2668 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002669 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07002670 }
2671 }
2672
2673 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_eq_8_subtile_n) {
2674 TEST_REQUIRES_X86_SSE2;
2675 for (uint32_t n = 1; n <= 4; n++) {
2676 GemmMicrokernelTester()
2677 .mr(3)
2678 .nr(4)
2679 .kr(2)
2680 .sr(1)
2681 .m(3)
2682 .n(n)
2683 .k(8)
2684 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002685 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07002686 }
2687 }
2688
2689 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_lt_8) {
2690 TEST_REQUIRES_X86_SSE2;
2691 for (size_t k = 1; k < 8; k++) {
2692 GemmMicrokernelTester()
2693 .mr(3)
2694 .nr(4)
2695 .kr(2)
2696 .sr(1)
2697 .m(3)
2698 .n(4)
2699 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002700 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07002701 }
2702 }
2703
2704 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_lt_8_subtile) {
2705 TEST_REQUIRES_X86_SSE2;
2706 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002707 for (uint32_t n = 1; n <= 4; n++) {
2708 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07002709 GemmMicrokernelTester()
2710 .mr(3)
2711 .nr(4)
2712 .kr(2)
2713 .sr(1)
2714 .m(m)
2715 .n(n)
2716 .k(k)
2717 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002718 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07002719 }
2720 }
2721 }
2722 }
2723
2724 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_gt_8) {
2725 TEST_REQUIRES_X86_SSE2;
2726 for (size_t k = 9; k < 16; k++) {
2727 GemmMicrokernelTester()
2728 .mr(3)
2729 .nr(4)
2730 .kr(2)
2731 .sr(1)
2732 .m(3)
2733 .n(4)
2734 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002735 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07002736 }
2737 }
2738
2739 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_gt_8_subtile) {
2740 TEST_REQUIRES_X86_SSE2;
2741 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002742 for (uint32_t n = 1; n <= 4; n++) {
2743 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07002744 GemmMicrokernelTester()
2745 .mr(3)
2746 .nr(4)
2747 .kr(2)
2748 .sr(1)
2749 .m(m)
2750 .n(n)
2751 .k(k)
2752 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002753 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07002754 }
2755 }
2756 }
2757 }
2758
2759 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_div_8) {
2760 TEST_REQUIRES_X86_SSE2;
2761 for (size_t k = 16; k <= 80; k += 8) {
2762 GemmMicrokernelTester()
2763 .mr(3)
2764 .nr(4)
2765 .kr(2)
2766 .sr(1)
2767 .m(3)
2768 .n(4)
2769 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002770 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07002771 }
2772 }
2773
2774 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_div_8_subtile) {
2775 TEST_REQUIRES_X86_SSE2;
2776 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002777 for (uint32_t n = 1; n <= 4; n++) {
2778 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07002779 GemmMicrokernelTester()
2780 .mr(3)
2781 .nr(4)
2782 .kr(2)
2783 .sr(1)
2784 .m(m)
2785 .n(n)
2786 .k(k)
2787 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002788 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07002789 }
2790 }
2791 }
2792 }
2793
2794 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_gt_4) {
2795 TEST_REQUIRES_X86_SSE2;
2796 for (uint32_t n = 5; n < 8; n++) {
2797 for (size_t k = 1; k <= 40; k += 9) {
2798 GemmMicrokernelTester()
2799 .mr(3)
2800 .nr(4)
2801 .kr(2)
2802 .sr(1)
2803 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002804 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -07002805 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002806 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07002807 }
2808 }
2809 }
2810
2811 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_gt_4_strided_cn) {
2812 TEST_REQUIRES_X86_SSE2;
2813 for (uint32_t n = 5; n < 8; n++) {
2814 for (size_t k = 1; k <= 40; k += 9) {
2815 GemmMicrokernelTester()
2816 .mr(3)
2817 .nr(4)
2818 .kr(2)
2819 .sr(1)
2820 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002821 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -07002822 .k(k)
2823 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08002824 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07002825 }
2826 }
2827 }
2828
2829 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_gt_4_subtile) {
2830 TEST_REQUIRES_X86_SSE2;
2831 for (uint32_t n = 5; n < 8; n++) {
2832 for (size_t k = 1; k <= 40; k += 9) {
2833 for (uint32_t m = 1; m <= 3; m++) {
2834 GemmMicrokernelTester()
2835 .mr(3)
2836 .nr(4)
2837 .kr(2)
2838 .sr(1)
2839 .m(m)
2840 .n(n)
2841 .k(k)
2842 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002843 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07002844 }
2845 }
2846 }
2847 }
2848
2849 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_div_4) {
2850 TEST_REQUIRES_X86_SSE2;
2851 for (uint32_t n = 8; n <= 12; n += 4) {
2852 for (size_t k = 1; k <= 40; k += 9) {
2853 GemmMicrokernelTester()
2854 .mr(3)
2855 .nr(4)
2856 .kr(2)
2857 .sr(1)
2858 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002859 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -07002860 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002861 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07002862 }
2863 }
2864 }
2865
2866 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_div_4_strided_cn) {
2867 TEST_REQUIRES_X86_SSE2;
2868 for (uint32_t n = 8; n <= 12; n += 4) {
2869 for (size_t k = 1; k <= 40; k += 9) {
2870 GemmMicrokernelTester()
2871 .mr(3)
2872 .nr(4)
2873 .kr(2)
2874 .sr(1)
2875 .m(3)
2876 .n(n)
2877 .k(k)
2878 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08002879 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07002880 }
2881 }
2882 }
2883
2884 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_div_4_subtile) {
2885 TEST_REQUIRES_X86_SSE2;
2886 for (uint32_t n = 8; n <= 12; n += 4) {
2887 for (size_t k = 1; k <= 40; k += 9) {
2888 for (uint32_t m = 1; m <= 3; m++) {
2889 GemmMicrokernelTester()
2890 .mr(3)
2891 .nr(4)
2892 .kr(2)
2893 .sr(1)
2894 .m(m)
2895 .n(n)
2896 .k(k)
2897 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002898 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07002899 }
2900 }
2901 }
2902 }
2903
2904 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, small_kernel) {
2905 TEST_REQUIRES_X86_SSE2;
2906 for (size_t k = 1; k <= 40; k += 9) {
2907 GemmMicrokernelTester()
2908 .mr(3)
2909 .nr(4)
2910 .kr(2)
2911 .sr(1)
2912 .m(3)
2913 .n(4)
2914 .k(k)
2915 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08002916 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07002917 }
2918 }
2919
2920 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, small_kernel_subtile) {
2921 TEST_REQUIRES_X86_SSE2;
2922 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002923 for (uint32_t n = 1; n <= 4; n++) {
2924 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07002925 GemmMicrokernelTester()
2926 .mr(3)
2927 .nr(4)
2928 .kr(2)
2929 .sr(1)
2930 .m(m)
2931 .n(n)
2932 .k(k)
2933 .ks(3)
2934 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002935 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07002936 }
2937 }
2938 }
2939 }
2940
2941 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_gt_4_small_kernel) {
2942 TEST_REQUIRES_X86_SSE2;
2943 for (uint32_t n = 5; n < 8; n++) {
2944 for (size_t k = 1; k <= 40; k += 9) {
2945 GemmMicrokernelTester()
2946 .mr(3)
2947 .nr(4)
2948 .kr(2)
2949 .sr(1)
2950 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002951 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -07002952 .k(k)
2953 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08002954 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07002955 }
2956 }
2957 }
2958
2959 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_div_4_small_kernel) {
2960 TEST_REQUIRES_X86_SSE2;
2961 for (uint32_t n = 8; n <= 12; n += 4) {
2962 for (size_t k = 1; k <= 40; k += 9) {
2963 GemmMicrokernelTester()
2964 .mr(3)
2965 .nr(4)
2966 .kr(2)
2967 .sr(1)
2968 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002969 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -07002970 .k(k)
2971 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08002972 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07002973 }
2974 }
2975 }
2976
2977 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, strided_cm_subtile) {
2978 TEST_REQUIRES_X86_SSE2;
2979 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002980 for (uint32_t n = 1; n <= 4; n++) {
2981 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07002982 GemmMicrokernelTester()
2983 .mr(3)
2984 .nr(4)
2985 .kr(2)
2986 .sr(1)
2987 .m(m)
2988 .n(n)
2989 .k(k)
2990 .cm_stride(7)
2991 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002992 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07002993 }
2994 }
2995 }
2996 }
2997
2998 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, a_offset) {
2999 TEST_REQUIRES_X86_SSE2;
3000 for (size_t k = 1; k <= 40; k += 9) {
3001 GemmMicrokernelTester()
3002 .mr(3)
3003 .nr(4)
3004 .kr(2)
3005 .sr(1)
3006 .m(3)
3007 .n(4)
3008 .k(k)
3009 .ks(3)
3010 .a_offset(127)
Marat Dukhan50323b82022-01-11 00:12:01 -08003011 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003012 }
3013 }
3014
3015 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, zero) {
3016 TEST_REQUIRES_X86_SSE2;
Zhi An Ng83844ae2022-01-14 09:52:25 -08003017 for (size_t k = 1; k <= 40; k += 9) {
3018 for (uint32_t mz = 0; mz < 3; mz++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003019 GemmMicrokernelTester()
3020 .mr(3)
3021 .nr(4)
3022 .kr(2)
3023 .sr(1)
3024 .m(3)
3025 .n(4)
3026 .k(k)
3027 .ks(3)
3028 .a_offset(127)
3029 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08003030 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003031 }
3032 }
3033 }
3034
3035 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, qmin) {
3036 TEST_REQUIRES_X86_SSE2;
3037 GemmMicrokernelTester()
3038 .mr(3)
3039 .nr(4)
3040 .kr(2)
3041 .sr(1)
3042 .m(3)
3043 .n(4)
3044 .k(8)
3045 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003046 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003047 }
3048
3049 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, qmax) {
3050 TEST_REQUIRES_X86_SSE2;
3051 GemmMicrokernelTester()
3052 .mr(3)
3053 .nr(4)
3054 .kr(2)
3055 .sr(1)
3056 .m(3)
3057 .n(4)
3058 .k(8)
3059 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003060 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003061 }
3062
3063 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, strided_cm) {
3064 TEST_REQUIRES_X86_SSE2;
3065 GemmMicrokernelTester()
3066 .mr(3)
3067 .nr(4)
3068 .kr(2)
3069 .sr(1)
3070 .m(3)
3071 .n(4)
3072 .k(8)
3073 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08003074 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003075 }
3076
3077 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, no_a_zero_point) {
3078 TEST_REQUIRES_X86_SSE2;
3079 for (size_t k = 1; k <= 40; k += 9) {
3080 GemmMicrokernelTester()
3081 .mr(3)
3082 .nr(4)
3083 .kr(2)
3084 .sr(1)
3085 .m(3)
3086 .n(4)
3087 .k(k)
3088 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08003089 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003090 }
3091 }
3092
3093 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, no_b_zero_point) {
3094 TEST_REQUIRES_X86_SSE2;
3095 for (size_t k = 1; k <= 40; k += 9) {
3096 GemmMicrokernelTester()
3097 .mr(3)
3098 .nr(4)
3099 .kr(2)
3100 .sr(1)
3101 .m(3)
3102 .n(4)
3103 .k(k)
3104 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08003105 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003106 }
3107 }
3108
3109 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, no_zero_point) {
3110 TEST_REQUIRES_X86_SSE2;
3111 for (size_t k = 1; k <= 40; k += 9) {
3112 GemmMicrokernelTester()
3113 .mr(3)
3114 .nr(4)
3115 .kr(2)
3116 .sr(1)
3117 .m(3)
3118 .n(4)
3119 .k(k)
3120 .a_zero_point(0)
3121 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08003122 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003123 }
3124 }
3125#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3126
3127
3128#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003129 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_eq_8) {
3130 TEST_REQUIRES_X86_SSE41;
3131 GemmMicrokernelTester()
3132 .mr(1)
3133 .nr(4)
3134 .kr(2)
3135 .sr(1)
3136 .m(1)
3137 .n(4)
3138 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08003139 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003140 }
3141
3142 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, strided_cn) {
3143 TEST_REQUIRES_X86_SSE41;
3144 GemmMicrokernelTester()
3145 .mr(1)
3146 .nr(4)
3147 .kr(2)
3148 .sr(1)
3149 .m(1)
3150 .n(4)
3151 .k(8)
3152 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08003153 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003154 }
3155
3156 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_eq_8_subtile) {
3157 TEST_REQUIRES_X86_SSE41;
Zhi An Ng83844ae2022-01-14 09:52:25 -08003158 for (uint32_t n = 1; n <= 4; n++) {
3159 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003160 GemmMicrokernelTester()
3161 .mr(1)
3162 .nr(4)
3163 .kr(2)
3164 .sr(1)
3165 .m(m)
3166 .n(n)
3167 .k(8)
3168 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003169 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003170 }
3171 }
3172 }
3173
3174 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_eq_8_subtile_m) {
3175 TEST_REQUIRES_X86_SSE41;
3176 for (uint32_t m = 1; m <= 1; m++) {
3177 GemmMicrokernelTester()
3178 .mr(1)
3179 .nr(4)
3180 .kr(2)
3181 .sr(1)
3182 .m(m)
3183 .n(4)
3184 .k(8)
3185 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003186 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003187 }
3188 }
3189
3190 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_eq_8_subtile_n) {
3191 TEST_REQUIRES_X86_SSE41;
3192 for (uint32_t n = 1; n <= 4; n++) {
3193 GemmMicrokernelTester()
3194 .mr(1)
3195 .nr(4)
3196 .kr(2)
3197 .sr(1)
3198 .m(1)
3199 .n(n)
3200 .k(8)
3201 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003202 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003203 }
3204 }
3205
3206 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_lt_8) {
3207 TEST_REQUIRES_X86_SSE41;
3208 for (size_t k = 1; k < 8; k++) {
3209 GemmMicrokernelTester()
3210 .mr(1)
3211 .nr(4)
3212 .kr(2)
3213 .sr(1)
3214 .m(1)
3215 .n(4)
3216 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003217 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003218 }
3219 }
3220
3221 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_lt_8_subtile) {
3222 TEST_REQUIRES_X86_SSE41;
3223 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003224 for (uint32_t n = 1; n <= 4; n++) {
3225 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003226 GemmMicrokernelTester()
3227 .mr(1)
3228 .nr(4)
3229 .kr(2)
3230 .sr(1)
3231 .m(m)
3232 .n(n)
3233 .k(k)
3234 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003235 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003236 }
3237 }
3238 }
3239 }
3240
3241 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_gt_8) {
3242 TEST_REQUIRES_X86_SSE41;
3243 for (size_t k = 9; k < 16; k++) {
3244 GemmMicrokernelTester()
3245 .mr(1)
3246 .nr(4)
3247 .kr(2)
3248 .sr(1)
3249 .m(1)
3250 .n(4)
3251 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003252 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003253 }
3254 }
3255
3256 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_gt_8_subtile) {
3257 TEST_REQUIRES_X86_SSE41;
3258 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003259 for (uint32_t n = 1; n <= 4; n++) {
3260 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003261 GemmMicrokernelTester()
3262 .mr(1)
3263 .nr(4)
3264 .kr(2)
3265 .sr(1)
3266 .m(m)
3267 .n(n)
3268 .k(k)
3269 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003270 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003271 }
3272 }
3273 }
3274 }
3275
3276 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_div_8) {
3277 TEST_REQUIRES_X86_SSE41;
3278 for (size_t k = 16; k <= 80; k += 8) {
3279 GemmMicrokernelTester()
3280 .mr(1)
3281 .nr(4)
3282 .kr(2)
3283 .sr(1)
3284 .m(1)
3285 .n(4)
3286 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003287 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003288 }
3289 }
3290
3291 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_div_8_subtile) {
3292 TEST_REQUIRES_X86_SSE41;
3293 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003294 for (uint32_t n = 1; n <= 4; n++) {
3295 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003296 GemmMicrokernelTester()
3297 .mr(1)
3298 .nr(4)
3299 .kr(2)
3300 .sr(1)
3301 .m(m)
3302 .n(n)
3303 .k(k)
3304 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003305 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003306 }
3307 }
3308 }
3309 }
3310
3311 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, n_gt_4) {
3312 TEST_REQUIRES_X86_SSE41;
3313 for (uint32_t n = 5; n < 8; n++) {
3314 for (size_t k = 1; k <= 40; k += 9) {
3315 GemmMicrokernelTester()
3316 .mr(1)
3317 .nr(4)
3318 .kr(2)
3319 .sr(1)
3320 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003321 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003322 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003323 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003324 }
3325 }
3326 }
3327
3328 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, n_gt_4_strided_cn) {
3329 TEST_REQUIRES_X86_SSE41;
3330 for (uint32_t n = 5; n < 8; n++) {
3331 for (size_t k = 1; k <= 40; k += 9) {
3332 GemmMicrokernelTester()
3333 .mr(1)
3334 .nr(4)
3335 .kr(2)
3336 .sr(1)
3337 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003338 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003339 .k(k)
3340 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08003341 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003342 }
3343 }
3344 }
3345
3346 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, n_gt_4_subtile) {
3347 TEST_REQUIRES_X86_SSE41;
3348 for (uint32_t n = 5; n < 8; n++) {
3349 for (size_t k = 1; k <= 40; k += 9) {
3350 for (uint32_t m = 1; m <= 1; m++) {
3351 GemmMicrokernelTester()
3352 .mr(1)
3353 .nr(4)
3354 .kr(2)
3355 .sr(1)
3356 .m(m)
3357 .n(n)
3358 .k(k)
3359 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003360 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003361 }
3362 }
3363 }
3364 }
3365
3366 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, n_div_4) {
3367 TEST_REQUIRES_X86_SSE41;
3368 for (uint32_t n = 8; n <= 12; n += 4) {
3369 for (size_t k = 1; k <= 40; k += 9) {
3370 GemmMicrokernelTester()
3371 .mr(1)
3372 .nr(4)
3373 .kr(2)
3374 .sr(1)
3375 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003376 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003377 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003378 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003379 }
3380 }
3381 }
3382
3383 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, n_div_4_strided_cn) {
3384 TEST_REQUIRES_X86_SSE41;
3385 for (uint32_t n = 8; n <= 12; n += 4) {
3386 for (size_t k = 1; k <= 40; k += 9) {
3387 GemmMicrokernelTester()
3388 .mr(1)
3389 .nr(4)
3390 .kr(2)
3391 .sr(1)
3392 .m(1)
3393 .n(n)
3394 .k(k)
3395 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08003396 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003397 }
3398 }
3399 }
3400
3401 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, n_div_4_subtile) {
3402 TEST_REQUIRES_X86_SSE41;
3403 for (uint32_t n = 8; n <= 12; n += 4) {
3404 for (size_t k = 1; k <= 40; k += 9) {
3405 for (uint32_t m = 1; m <= 1; m++) {
3406 GemmMicrokernelTester()
3407 .mr(1)
3408 .nr(4)
3409 .kr(2)
3410 .sr(1)
3411 .m(m)
3412 .n(n)
3413 .k(k)
3414 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003415 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003416 }
3417 }
3418 }
3419 }
3420
3421 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, small_kernel) {
3422 TEST_REQUIRES_X86_SSE41;
3423 for (size_t k = 1; k <= 40; k += 9) {
3424 GemmMicrokernelTester()
3425 .mr(1)
3426 .nr(4)
3427 .kr(2)
3428 .sr(1)
3429 .m(1)
3430 .n(4)
3431 .k(k)
3432 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08003433 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003434 }
3435 }
3436
3437 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, small_kernel_subtile) {
3438 TEST_REQUIRES_X86_SSE41;
3439 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003440 for (uint32_t n = 1; n <= 4; n++) {
3441 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003442 GemmMicrokernelTester()
3443 .mr(1)
3444 .nr(4)
3445 .kr(2)
3446 .sr(1)
3447 .m(m)
3448 .n(n)
3449 .k(k)
3450 .ks(3)
3451 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003452 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003453 }
3454 }
3455 }
3456 }
3457
3458 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, n_gt_4_small_kernel) {
3459 TEST_REQUIRES_X86_SSE41;
3460 for (uint32_t n = 5; n < 8; n++) {
3461 for (size_t k = 1; k <= 40; k += 9) {
3462 GemmMicrokernelTester()
3463 .mr(1)
3464 .nr(4)
3465 .kr(2)
3466 .sr(1)
3467 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003468 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003469 .k(k)
3470 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08003471 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003472 }
3473 }
3474 }
3475
3476 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, n_div_4_small_kernel) {
3477 TEST_REQUIRES_X86_SSE41;
3478 for (uint32_t n = 8; n <= 12; n += 4) {
3479 for (size_t k = 1; k <= 40; k += 9) {
3480 GemmMicrokernelTester()
3481 .mr(1)
3482 .nr(4)
3483 .kr(2)
3484 .sr(1)
3485 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003486 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003487 .k(k)
3488 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08003489 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003490 }
3491 }
3492 }
3493
3494 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, strided_cm_subtile) {
3495 TEST_REQUIRES_X86_SSE41;
3496 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003497 for (uint32_t n = 1; n <= 4; n++) {
3498 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003499 GemmMicrokernelTester()
3500 .mr(1)
3501 .nr(4)
3502 .kr(2)
3503 .sr(1)
3504 .m(m)
3505 .n(n)
3506 .k(k)
3507 .cm_stride(7)
3508 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003509 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003510 }
3511 }
3512 }
3513 }
3514
3515 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, a_offset) {
3516 TEST_REQUIRES_X86_SSE41;
3517 for (size_t k = 1; k <= 40; k += 9) {
3518 GemmMicrokernelTester()
3519 .mr(1)
3520 .nr(4)
3521 .kr(2)
3522 .sr(1)
3523 .m(1)
3524 .n(4)
3525 .k(k)
3526 .ks(3)
3527 .a_offset(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08003528 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003529 }
3530 }
3531
3532 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, zero) {
3533 TEST_REQUIRES_X86_SSE41;
Zhi An Ng83844ae2022-01-14 09:52:25 -08003534 for (size_t k = 1; k <= 40; k += 9) {
3535 for (uint32_t mz = 0; mz < 1; mz++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003536 GemmMicrokernelTester()
3537 .mr(1)
3538 .nr(4)
3539 .kr(2)
3540 .sr(1)
3541 .m(1)
3542 .n(4)
3543 .k(k)
3544 .ks(3)
3545 .a_offset(43)
3546 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08003547 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003548 }
3549 }
3550 }
3551
3552 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, qmin) {
3553 TEST_REQUIRES_X86_SSE41;
3554 GemmMicrokernelTester()
3555 .mr(1)
3556 .nr(4)
3557 .kr(2)
3558 .sr(1)
3559 .m(1)
3560 .n(4)
3561 .k(8)
3562 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003563 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003564 }
3565
3566 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, qmax) {
3567 TEST_REQUIRES_X86_SSE41;
3568 GemmMicrokernelTester()
3569 .mr(1)
3570 .nr(4)
3571 .kr(2)
3572 .sr(1)
3573 .m(1)
3574 .n(4)
3575 .k(8)
3576 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003577 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003578 }
3579
3580 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, strided_cm) {
3581 TEST_REQUIRES_X86_SSE41;
3582 GemmMicrokernelTester()
3583 .mr(1)
3584 .nr(4)
3585 .kr(2)
3586 .sr(1)
3587 .m(1)
3588 .n(4)
3589 .k(8)
3590 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08003591 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003592 }
3593
3594 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, no_a_zero_point) {
3595 TEST_REQUIRES_X86_SSE41;
3596 for (size_t k = 1; k <= 40; k += 9) {
3597 GemmMicrokernelTester()
3598 .mr(1)
3599 .nr(4)
3600 .kr(2)
3601 .sr(1)
3602 .m(1)
3603 .n(4)
3604 .k(k)
3605 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08003606 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003607 }
3608 }
3609
3610 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, no_b_zero_point) {
3611 TEST_REQUIRES_X86_SSE41;
3612 for (size_t k = 1; k <= 40; k += 9) {
3613 GemmMicrokernelTester()
3614 .mr(1)
3615 .nr(4)
3616 .kr(2)
3617 .sr(1)
3618 .m(1)
3619 .n(4)
3620 .k(k)
3621 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08003622 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003623 }
3624 }
3625
3626 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, no_zero_point) {
3627 TEST_REQUIRES_X86_SSE41;
3628 for (size_t k = 1; k <= 40; k += 9) {
3629 GemmMicrokernelTester()
3630 .mr(1)
3631 .nr(4)
3632 .kr(2)
3633 .sr(1)
3634 .m(1)
3635 .n(4)
3636 .k(k)
3637 .a_zero_point(0)
3638 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08003639 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003640 }
3641 }
3642#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3643
3644
3645#if XNN_ARCH_X86 || XNN_ARCH_X86_64
3646 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_eq_8) {
3647 TEST_REQUIRES_X86_SSE41;
3648 GemmMicrokernelTester()
3649 .mr(2)
3650 .nr(4)
3651 .kr(2)
3652 .sr(1)
3653 .m(2)
3654 .n(4)
3655 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08003656 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003657 }
3658
3659 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, strided_cn) {
3660 TEST_REQUIRES_X86_SSE41;
3661 GemmMicrokernelTester()
3662 .mr(2)
3663 .nr(4)
3664 .kr(2)
3665 .sr(1)
3666 .m(2)
3667 .n(4)
3668 .k(8)
3669 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08003670 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003671 }
3672
3673 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_eq_8_subtile) {
3674 TEST_REQUIRES_X86_SSE41;
Zhi An Ng83844ae2022-01-14 09:52:25 -08003675 for (uint32_t n = 1; n <= 4; n++) {
3676 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003677 GemmMicrokernelTester()
3678 .mr(2)
3679 .nr(4)
3680 .kr(2)
3681 .sr(1)
3682 .m(m)
3683 .n(n)
3684 .k(8)
3685 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003686 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003687 }
3688 }
3689 }
3690
3691 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_eq_8_subtile_m) {
3692 TEST_REQUIRES_X86_SSE41;
3693 for (uint32_t m = 1; m <= 2; m++) {
3694 GemmMicrokernelTester()
3695 .mr(2)
3696 .nr(4)
3697 .kr(2)
3698 .sr(1)
3699 .m(m)
3700 .n(4)
3701 .k(8)
3702 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003703 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003704 }
3705 }
3706
3707 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_eq_8_subtile_n) {
3708 TEST_REQUIRES_X86_SSE41;
3709 for (uint32_t n = 1; n <= 4; n++) {
3710 GemmMicrokernelTester()
3711 .mr(2)
3712 .nr(4)
3713 .kr(2)
3714 .sr(1)
3715 .m(2)
3716 .n(n)
3717 .k(8)
3718 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003719 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003720 }
3721 }
3722
3723 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_lt_8) {
3724 TEST_REQUIRES_X86_SSE41;
3725 for (size_t k = 1; k < 8; k++) {
3726 GemmMicrokernelTester()
3727 .mr(2)
3728 .nr(4)
3729 .kr(2)
3730 .sr(1)
3731 .m(2)
3732 .n(4)
3733 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003734 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003735 }
3736 }
3737
3738 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_lt_8_subtile) {
3739 TEST_REQUIRES_X86_SSE41;
3740 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003741 for (uint32_t n = 1; n <= 4; n++) {
3742 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003743 GemmMicrokernelTester()
3744 .mr(2)
3745 .nr(4)
3746 .kr(2)
3747 .sr(1)
3748 .m(m)
3749 .n(n)
3750 .k(k)
3751 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003752 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003753 }
3754 }
3755 }
3756 }
3757
3758 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_gt_8) {
3759 TEST_REQUIRES_X86_SSE41;
3760 for (size_t k = 9; k < 16; k++) {
3761 GemmMicrokernelTester()
3762 .mr(2)
3763 .nr(4)
3764 .kr(2)
3765 .sr(1)
3766 .m(2)
3767 .n(4)
3768 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003769 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003770 }
3771 }
3772
3773 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_gt_8_subtile) {
3774 TEST_REQUIRES_X86_SSE41;
3775 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003776 for (uint32_t n = 1; n <= 4; n++) {
3777 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003778 GemmMicrokernelTester()
3779 .mr(2)
3780 .nr(4)
3781 .kr(2)
3782 .sr(1)
3783 .m(m)
3784 .n(n)
3785 .k(k)
3786 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003787 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003788 }
3789 }
3790 }
3791 }
3792
3793 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_div_8) {
3794 TEST_REQUIRES_X86_SSE41;
3795 for (size_t k = 16; k <= 80; k += 8) {
3796 GemmMicrokernelTester()
3797 .mr(2)
3798 .nr(4)
3799 .kr(2)
3800 .sr(1)
3801 .m(2)
3802 .n(4)
3803 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003804 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003805 }
3806 }
3807
3808 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_div_8_subtile) {
3809 TEST_REQUIRES_X86_SSE41;
3810 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003811 for (uint32_t n = 1; n <= 4; n++) {
3812 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003813 GemmMicrokernelTester()
3814 .mr(2)
3815 .nr(4)
3816 .kr(2)
3817 .sr(1)
3818 .m(m)
3819 .n(n)
3820 .k(k)
3821 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003822 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003823 }
3824 }
3825 }
3826 }
3827
3828 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, n_gt_4) {
3829 TEST_REQUIRES_X86_SSE41;
3830 for (uint32_t n = 5; n < 8; n++) {
3831 for (size_t k = 1; k <= 40; k += 9) {
3832 GemmMicrokernelTester()
3833 .mr(2)
3834 .nr(4)
3835 .kr(2)
3836 .sr(1)
3837 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003838 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003839 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003840 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003841 }
3842 }
3843 }
3844
3845 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, n_gt_4_strided_cn) {
3846 TEST_REQUIRES_X86_SSE41;
3847 for (uint32_t n = 5; n < 8; n++) {
3848 for (size_t k = 1; k <= 40; k += 9) {
3849 GemmMicrokernelTester()
3850 .mr(2)
3851 .nr(4)
3852 .kr(2)
3853 .sr(1)
3854 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003855 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003856 .k(k)
3857 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08003858 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003859 }
3860 }
3861 }
3862
3863 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, n_gt_4_subtile) {
3864 TEST_REQUIRES_X86_SSE41;
3865 for (uint32_t n = 5; n < 8; n++) {
3866 for (size_t k = 1; k <= 40; k += 9) {
3867 for (uint32_t m = 1; m <= 2; m++) {
3868 GemmMicrokernelTester()
3869 .mr(2)
3870 .nr(4)
3871 .kr(2)
3872 .sr(1)
3873 .m(m)
3874 .n(n)
3875 .k(k)
3876 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003877 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003878 }
3879 }
3880 }
3881 }
3882
3883 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, n_div_4) {
3884 TEST_REQUIRES_X86_SSE41;
3885 for (uint32_t n = 8; n <= 12; n += 4) {
3886 for (size_t k = 1; k <= 40; k += 9) {
3887 GemmMicrokernelTester()
3888 .mr(2)
3889 .nr(4)
3890 .kr(2)
3891 .sr(1)
3892 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003893 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003894 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003895 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003896 }
3897 }
3898 }
3899
3900 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, n_div_4_strided_cn) {
3901 TEST_REQUIRES_X86_SSE41;
3902 for (uint32_t n = 8; n <= 12; n += 4) {
3903 for (size_t k = 1; k <= 40; k += 9) {
3904 GemmMicrokernelTester()
3905 .mr(2)
3906 .nr(4)
3907 .kr(2)
3908 .sr(1)
3909 .m(2)
3910 .n(n)
3911 .k(k)
3912 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08003913 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003914 }
3915 }
3916 }
3917
3918 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, n_div_4_subtile) {
3919 TEST_REQUIRES_X86_SSE41;
3920 for (uint32_t n = 8; n <= 12; n += 4) {
3921 for (size_t k = 1; k <= 40; k += 9) {
3922 for (uint32_t m = 1; m <= 2; m++) {
3923 GemmMicrokernelTester()
3924 .mr(2)
3925 .nr(4)
3926 .kr(2)
3927 .sr(1)
3928 .m(m)
3929 .n(n)
3930 .k(k)
3931 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003932 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003933 }
3934 }
3935 }
3936 }
3937
3938 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, small_kernel) {
3939 TEST_REQUIRES_X86_SSE41;
3940 for (size_t k = 1; k <= 40; k += 9) {
3941 GemmMicrokernelTester()
3942 .mr(2)
3943 .nr(4)
3944 .kr(2)
3945 .sr(1)
3946 .m(2)
3947 .n(4)
3948 .k(k)
3949 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08003950 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003951 }
3952 }
3953
3954 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, small_kernel_subtile) {
3955 TEST_REQUIRES_X86_SSE41;
3956 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003957 for (uint32_t n = 1; n <= 4; n++) {
3958 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003959 GemmMicrokernelTester()
3960 .mr(2)
3961 .nr(4)
3962 .kr(2)
3963 .sr(1)
3964 .m(m)
3965 .n(n)
3966 .k(k)
3967 .ks(3)
3968 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003969 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003970 }
3971 }
3972 }
3973 }
3974
3975 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, n_gt_4_small_kernel) {
3976 TEST_REQUIRES_X86_SSE41;
3977 for (uint32_t n = 5; n < 8; n++) {
3978 for (size_t k = 1; k <= 40; k += 9) {
3979 GemmMicrokernelTester()
3980 .mr(2)
3981 .nr(4)
3982 .kr(2)
3983 .sr(1)
3984 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003985 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003986 .k(k)
3987 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08003988 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07003989 }
3990 }
3991 }
3992
3993 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, n_div_4_small_kernel) {
3994 TEST_REQUIRES_X86_SSE41;
3995 for (uint32_t n = 8; n <= 12; n += 4) {
3996 for (size_t k = 1; k <= 40; k += 9) {
3997 GemmMicrokernelTester()
3998 .mr(2)
3999 .nr(4)
4000 .kr(2)
4001 .sr(1)
4002 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004003 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004004 .k(k)
4005 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08004006 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004007 }
4008 }
4009 }
4010
4011 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, strided_cm_subtile) {
4012 TEST_REQUIRES_X86_SSE41;
4013 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004014 for (uint32_t n = 1; n <= 4; n++) {
4015 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004016 GemmMicrokernelTester()
4017 .mr(2)
4018 .nr(4)
4019 .kr(2)
4020 .sr(1)
4021 .m(m)
4022 .n(n)
4023 .k(k)
4024 .cm_stride(7)
4025 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004026 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004027 }
4028 }
4029 }
4030 }
4031
4032 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, a_offset) {
4033 TEST_REQUIRES_X86_SSE41;
4034 for (size_t k = 1; k <= 40; k += 9) {
4035 GemmMicrokernelTester()
4036 .mr(2)
4037 .nr(4)
4038 .kr(2)
4039 .sr(1)
4040 .m(2)
4041 .n(4)
4042 .k(k)
4043 .ks(3)
4044 .a_offset(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08004045 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004046 }
4047 }
4048
4049 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, zero) {
4050 TEST_REQUIRES_X86_SSE41;
Zhi An Ng83844ae2022-01-14 09:52:25 -08004051 for (size_t k = 1; k <= 40; k += 9) {
4052 for (uint32_t mz = 0; mz < 2; mz++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004053 GemmMicrokernelTester()
4054 .mr(2)
4055 .nr(4)
4056 .kr(2)
4057 .sr(1)
4058 .m(2)
4059 .n(4)
4060 .k(k)
4061 .ks(3)
4062 .a_offset(83)
4063 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08004064 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004065 }
4066 }
4067 }
4068
4069 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, qmin) {
4070 TEST_REQUIRES_X86_SSE41;
4071 GemmMicrokernelTester()
4072 .mr(2)
4073 .nr(4)
4074 .kr(2)
4075 .sr(1)
4076 .m(2)
4077 .n(4)
4078 .k(8)
4079 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004080 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004081 }
4082
4083 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, qmax) {
4084 TEST_REQUIRES_X86_SSE41;
4085 GemmMicrokernelTester()
4086 .mr(2)
4087 .nr(4)
4088 .kr(2)
4089 .sr(1)
4090 .m(2)
4091 .n(4)
4092 .k(8)
4093 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004094 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004095 }
4096
4097 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, strided_cm) {
4098 TEST_REQUIRES_X86_SSE41;
4099 GemmMicrokernelTester()
4100 .mr(2)
4101 .nr(4)
4102 .kr(2)
4103 .sr(1)
4104 .m(2)
4105 .n(4)
4106 .k(8)
4107 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08004108 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004109 }
4110
4111 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, no_a_zero_point) {
4112 TEST_REQUIRES_X86_SSE41;
4113 for (size_t k = 1; k <= 40; k += 9) {
4114 GemmMicrokernelTester()
4115 .mr(2)
4116 .nr(4)
4117 .kr(2)
4118 .sr(1)
4119 .m(2)
4120 .n(4)
4121 .k(k)
4122 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08004123 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004124 }
4125 }
4126
4127 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, no_b_zero_point) {
4128 TEST_REQUIRES_X86_SSE41;
4129 for (size_t k = 1; k <= 40; k += 9) {
4130 GemmMicrokernelTester()
4131 .mr(2)
4132 .nr(4)
4133 .kr(2)
4134 .sr(1)
4135 .m(2)
4136 .n(4)
4137 .k(k)
4138 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08004139 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004140 }
4141 }
4142
4143 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, no_zero_point) {
4144 TEST_REQUIRES_X86_SSE41;
4145 for (size_t k = 1; k <= 40; k += 9) {
4146 GemmMicrokernelTester()
4147 .mr(2)
4148 .nr(4)
4149 .kr(2)
4150 .sr(1)
4151 .m(2)
4152 .n(4)
4153 .k(k)
4154 .a_zero_point(0)
4155 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08004156 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004157 }
4158 }
4159#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4160
4161
4162#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004163 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_eq_8) {
4164 TEST_REQUIRES_X86_SSE41;
4165 GemmMicrokernelTester()
4166 .mr(4)
4167 .nr(4)
4168 .kr(2)
4169 .sr(1)
4170 .m(4)
4171 .n(4)
4172 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08004173 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004174 }
4175
4176 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, strided_cn) {
4177 TEST_REQUIRES_X86_SSE41;
4178 GemmMicrokernelTester()
4179 .mr(4)
4180 .nr(4)
4181 .kr(2)
4182 .sr(1)
4183 .m(4)
4184 .n(4)
4185 .k(8)
4186 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08004187 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004188 }
4189
4190 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_eq_8_subtile) {
4191 TEST_REQUIRES_X86_SSE41;
Zhi An Ng83844ae2022-01-14 09:52:25 -08004192 for (uint32_t n = 1; n <= 4; n++) {
4193 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004194 GemmMicrokernelTester()
4195 .mr(4)
4196 .nr(4)
4197 .kr(2)
4198 .sr(1)
4199 .m(m)
4200 .n(n)
4201 .k(8)
4202 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004203 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004204 }
4205 }
4206 }
4207
4208 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_eq_8_subtile_m) {
4209 TEST_REQUIRES_X86_SSE41;
4210 for (uint32_t m = 1; m <= 4; m++) {
4211 GemmMicrokernelTester()
4212 .mr(4)
4213 .nr(4)
4214 .kr(2)
4215 .sr(1)
4216 .m(m)
4217 .n(4)
4218 .k(8)
4219 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004220 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004221 }
4222 }
4223
4224 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_eq_8_subtile_n) {
4225 TEST_REQUIRES_X86_SSE41;
4226 for (uint32_t n = 1; n <= 4; n++) {
4227 GemmMicrokernelTester()
4228 .mr(4)
4229 .nr(4)
4230 .kr(2)
4231 .sr(1)
4232 .m(4)
4233 .n(n)
4234 .k(8)
4235 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004236 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004237 }
4238 }
4239
4240 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_lt_8) {
4241 TEST_REQUIRES_X86_SSE41;
4242 for (size_t k = 1; k < 8; k++) {
4243 GemmMicrokernelTester()
4244 .mr(4)
4245 .nr(4)
4246 .kr(2)
4247 .sr(1)
4248 .m(4)
4249 .n(4)
4250 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004251 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004252 }
4253 }
4254
4255 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_lt_8_subtile) {
4256 TEST_REQUIRES_X86_SSE41;
4257 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004258 for (uint32_t n = 1; n <= 4; n++) {
4259 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004260 GemmMicrokernelTester()
4261 .mr(4)
4262 .nr(4)
4263 .kr(2)
4264 .sr(1)
4265 .m(m)
4266 .n(n)
4267 .k(k)
4268 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004269 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004270 }
4271 }
4272 }
4273 }
4274
4275 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_gt_8) {
4276 TEST_REQUIRES_X86_SSE41;
4277 for (size_t k = 9; k < 16; k++) {
4278 GemmMicrokernelTester()
4279 .mr(4)
4280 .nr(4)
4281 .kr(2)
4282 .sr(1)
4283 .m(4)
4284 .n(4)
4285 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004286 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004287 }
4288 }
4289
4290 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_gt_8_subtile) {
4291 TEST_REQUIRES_X86_SSE41;
4292 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004293 for (uint32_t n = 1; n <= 4; n++) {
4294 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004295 GemmMicrokernelTester()
4296 .mr(4)
4297 .nr(4)
4298 .kr(2)
4299 .sr(1)
4300 .m(m)
4301 .n(n)
4302 .k(k)
4303 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004304 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004305 }
4306 }
4307 }
4308 }
4309
4310 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_div_8) {
4311 TEST_REQUIRES_X86_SSE41;
4312 for (size_t k = 16; k <= 80; k += 8) {
4313 GemmMicrokernelTester()
4314 .mr(4)
4315 .nr(4)
4316 .kr(2)
4317 .sr(1)
4318 .m(4)
4319 .n(4)
4320 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004321 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004322 }
4323 }
4324
4325 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_div_8_subtile) {
4326 TEST_REQUIRES_X86_SSE41;
4327 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004328 for (uint32_t n = 1; n <= 4; n++) {
4329 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004330 GemmMicrokernelTester()
4331 .mr(4)
4332 .nr(4)
4333 .kr(2)
4334 .sr(1)
4335 .m(m)
4336 .n(n)
4337 .k(k)
4338 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004339 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004340 }
4341 }
4342 }
4343 }
4344
4345 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, n_gt_4) {
4346 TEST_REQUIRES_X86_SSE41;
4347 for (uint32_t n = 5; n < 8; n++) {
4348 for (size_t k = 1; k <= 40; k += 9) {
4349 GemmMicrokernelTester()
4350 .mr(4)
4351 .nr(4)
4352 .kr(2)
4353 .sr(1)
4354 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004355 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004356 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004357 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004358 }
4359 }
4360 }
4361
4362 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, n_gt_4_strided_cn) {
4363 TEST_REQUIRES_X86_SSE41;
4364 for (uint32_t n = 5; n < 8; n++) {
4365 for (size_t k = 1; k <= 40; k += 9) {
4366 GemmMicrokernelTester()
4367 .mr(4)
4368 .nr(4)
4369 .kr(2)
4370 .sr(1)
4371 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004372 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004373 .k(k)
4374 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08004375 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004376 }
4377 }
4378 }
4379
4380 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, n_gt_4_subtile) {
4381 TEST_REQUIRES_X86_SSE41;
4382 for (uint32_t n = 5; n < 8; n++) {
4383 for (size_t k = 1; k <= 40; k += 9) {
4384 for (uint32_t m = 1; m <= 4; m++) {
4385 GemmMicrokernelTester()
4386 .mr(4)
4387 .nr(4)
4388 .kr(2)
4389 .sr(1)
4390 .m(m)
4391 .n(n)
4392 .k(k)
4393 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004394 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004395 }
4396 }
4397 }
4398 }
4399
4400 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, n_div_4) {
4401 TEST_REQUIRES_X86_SSE41;
4402 for (uint32_t n = 8; n <= 12; n += 4) {
4403 for (size_t k = 1; k <= 40; k += 9) {
4404 GemmMicrokernelTester()
4405 .mr(4)
4406 .nr(4)
4407 .kr(2)
4408 .sr(1)
4409 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004410 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004411 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004412 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004413 }
4414 }
4415 }
4416
4417 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, n_div_4_strided_cn) {
4418 TEST_REQUIRES_X86_SSE41;
4419 for (uint32_t n = 8; n <= 12; n += 4) {
4420 for (size_t k = 1; k <= 40; k += 9) {
4421 GemmMicrokernelTester()
4422 .mr(4)
4423 .nr(4)
4424 .kr(2)
4425 .sr(1)
4426 .m(4)
4427 .n(n)
4428 .k(k)
4429 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08004430 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004431 }
4432 }
4433 }
4434
4435 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, n_div_4_subtile) {
4436 TEST_REQUIRES_X86_SSE41;
4437 for (uint32_t n = 8; n <= 12; n += 4) {
4438 for (size_t k = 1; k <= 40; k += 9) {
4439 for (uint32_t m = 1; m <= 4; m++) {
4440 GemmMicrokernelTester()
4441 .mr(4)
4442 .nr(4)
4443 .kr(2)
4444 .sr(1)
4445 .m(m)
4446 .n(n)
4447 .k(k)
4448 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004449 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004450 }
4451 }
4452 }
4453 }
4454
4455 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, small_kernel) {
4456 TEST_REQUIRES_X86_SSE41;
4457 for (size_t k = 1; k <= 40; k += 9) {
4458 GemmMicrokernelTester()
4459 .mr(4)
4460 .nr(4)
4461 .kr(2)
4462 .sr(1)
4463 .m(4)
4464 .n(4)
4465 .k(k)
4466 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08004467 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004468 }
4469 }
4470
4471 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, small_kernel_subtile) {
4472 TEST_REQUIRES_X86_SSE41;
4473 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004474 for (uint32_t n = 1; n <= 4; n++) {
4475 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004476 GemmMicrokernelTester()
4477 .mr(4)
4478 .nr(4)
4479 .kr(2)
4480 .sr(1)
4481 .m(m)
4482 .n(n)
4483 .k(k)
4484 .ks(3)
4485 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004486 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004487 }
4488 }
4489 }
4490 }
4491
4492 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, n_gt_4_small_kernel) {
4493 TEST_REQUIRES_X86_SSE41;
4494 for (uint32_t n = 5; n < 8; n++) {
4495 for (size_t k = 1; k <= 40; k += 9) {
4496 GemmMicrokernelTester()
4497 .mr(4)
4498 .nr(4)
4499 .kr(2)
4500 .sr(1)
4501 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004502 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004503 .k(k)
4504 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08004505 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004506 }
4507 }
4508 }
4509
4510 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, n_div_4_small_kernel) {
4511 TEST_REQUIRES_X86_SSE41;
4512 for (uint32_t n = 8; n <= 12; n += 4) {
4513 for (size_t k = 1; k <= 40; k += 9) {
4514 GemmMicrokernelTester()
4515 .mr(4)
4516 .nr(4)
4517 .kr(2)
4518 .sr(1)
4519 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004520 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004521 .k(k)
4522 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08004523 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004524 }
4525 }
4526 }
4527
4528 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, strided_cm_subtile) {
4529 TEST_REQUIRES_X86_SSE41;
4530 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004531 for (uint32_t n = 1; n <= 4; n++) {
4532 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004533 GemmMicrokernelTester()
4534 .mr(4)
4535 .nr(4)
4536 .kr(2)
4537 .sr(1)
4538 .m(m)
4539 .n(n)
4540 .k(k)
4541 .cm_stride(7)
4542 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004543 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004544 }
4545 }
4546 }
4547 }
4548
4549 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, a_offset) {
4550 TEST_REQUIRES_X86_SSE41;
4551 for (size_t k = 1; k <= 40; k += 9) {
4552 GemmMicrokernelTester()
4553 .mr(4)
4554 .nr(4)
4555 .kr(2)
4556 .sr(1)
4557 .m(4)
4558 .n(4)
4559 .k(k)
4560 .ks(3)
4561 .a_offset(163)
Marat Dukhan50323b82022-01-11 00:12:01 -08004562 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004563 }
4564 }
4565
4566 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, zero) {
4567 TEST_REQUIRES_X86_SSE41;
Zhi An Ng83844ae2022-01-14 09:52:25 -08004568 for (size_t k = 1; k <= 40; k += 9) {
4569 for (uint32_t mz = 0; mz < 4; mz++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004570 GemmMicrokernelTester()
4571 .mr(4)
4572 .nr(4)
4573 .kr(2)
4574 .sr(1)
4575 .m(4)
4576 .n(4)
4577 .k(k)
4578 .ks(3)
4579 .a_offset(163)
4580 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08004581 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004582 }
4583 }
4584 }
4585
4586 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, qmin) {
4587 TEST_REQUIRES_X86_SSE41;
4588 GemmMicrokernelTester()
4589 .mr(4)
4590 .nr(4)
4591 .kr(2)
4592 .sr(1)
4593 .m(4)
4594 .n(4)
4595 .k(8)
4596 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004597 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004598 }
4599
4600 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, qmax) {
4601 TEST_REQUIRES_X86_SSE41;
4602 GemmMicrokernelTester()
4603 .mr(4)
4604 .nr(4)
4605 .kr(2)
4606 .sr(1)
4607 .m(4)
4608 .n(4)
4609 .k(8)
4610 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004611 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004612 }
4613
4614 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, strided_cm) {
4615 TEST_REQUIRES_X86_SSE41;
4616 GemmMicrokernelTester()
4617 .mr(4)
4618 .nr(4)
4619 .kr(2)
4620 .sr(1)
4621 .m(4)
4622 .n(4)
4623 .k(8)
4624 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08004625 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004626 }
4627
4628 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, no_a_zero_point) {
4629 TEST_REQUIRES_X86_SSE41;
4630 for (size_t k = 1; k <= 40; k += 9) {
4631 GemmMicrokernelTester()
4632 .mr(4)
4633 .nr(4)
4634 .kr(2)
4635 .sr(1)
4636 .m(4)
4637 .n(4)
4638 .k(k)
4639 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08004640 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004641 }
4642 }
4643
4644 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, no_b_zero_point) {
4645 TEST_REQUIRES_X86_SSE41;
4646 for (size_t k = 1; k <= 40; k += 9) {
4647 GemmMicrokernelTester()
4648 .mr(4)
4649 .nr(4)
4650 .kr(2)
4651 .sr(1)
4652 .m(4)
4653 .n(4)
4654 .k(k)
4655 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08004656 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004657 }
4658 }
4659
4660 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, no_zero_point) {
4661 TEST_REQUIRES_X86_SSE41;
4662 for (size_t k = 1; k <= 40; k += 9) {
4663 GemmMicrokernelTester()
4664 .mr(4)
4665 .nr(4)
4666 .kr(2)
4667 .sr(1)
4668 .m(4)
4669 .n(4)
4670 .k(k)
4671 .a_zero_point(0)
4672 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08004673 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004674 }
4675 }
4676#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4677
4678
4679#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004680 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_eq_8) {
4681 TEST_REQUIRES_X86_AVX;
4682 GemmMicrokernelTester()
4683 .mr(2)
4684 .nr(4)
4685 .kr(2)
4686 .sr(1)
4687 .m(2)
4688 .n(4)
4689 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08004690 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004691 }
4692
4693 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, strided_cn) {
4694 TEST_REQUIRES_X86_AVX;
4695 GemmMicrokernelTester()
4696 .mr(2)
4697 .nr(4)
4698 .kr(2)
4699 .sr(1)
4700 .m(2)
4701 .n(4)
4702 .k(8)
4703 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08004704 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004705 }
4706
4707 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_eq_8_subtile) {
4708 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -08004709 for (uint32_t n = 1; n <= 4; n++) {
4710 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004711 GemmMicrokernelTester()
4712 .mr(2)
4713 .nr(4)
4714 .kr(2)
4715 .sr(1)
4716 .m(m)
4717 .n(n)
4718 .k(8)
4719 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004720 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004721 }
4722 }
4723 }
4724
4725 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_eq_8_subtile_m) {
4726 TEST_REQUIRES_X86_AVX;
4727 for (uint32_t m = 1; m <= 2; m++) {
4728 GemmMicrokernelTester()
4729 .mr(2)
4730 .nr(4)
4731 .kr(2)
4732 .sr(1)
4733 .m(m)
4734 .n(4)
4735 .k(8)
4736 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004737 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004738 }
4739 }
4740
4741 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_eq_8_subtile_n) {
4742 TEST_REQUIRES_X86_AVX;
4743 for (uint32_t n = 1; n <= 4; n++) {
4744 GemmMicrokernelTester()
4745 .mr(2)
4746 .nr(4)
4747 .kr(2)
4748 .sr(1)
4749 .m(2)
4750 .n(n)
4751 .k(8)
4752 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004753 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004754 }
4755 }
4756
4757 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_lt_8) {
4758 TEST_REQUIRES_X86_AVX;
4759 for (size_t k = 1; k < 8; k++) {
4760 GemmMicrokernelTester()
4761 .mr(2)
4762 .nr(4)
4763 .kr(2)
4764 .sr(1)
4765 .m(2)
4766 .n(4)
4767 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004768 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004769 }
4770 }
4771
4772 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_lt_8_subtile) {
4773 TEST_REQUIRES_X86_AVX;
4774 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004775 for (uint32_t n = 1; n <= 4; n++) {
4776 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004777 GemmMicrokernelTester()
4778 .mr(2)
4779 .nr(4)
4780 .kr(2)
4781 .sr(1)
4782 .m(m)
4783 .n(n)
4784 .k(k)
4785 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004786 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004787 }
4788 }
4789 }
4790 }
4791
4792 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_gt_8) {
4793 TEST_REQUIRES_X86_AVX;
4794 for (size_t k = 9; k < 16; k++) {
4795 GemmMicrokernelTester()
4796 .mr(2)
4797 .nr(4)
4798 .kr(2)
4799 .sr(1)
4800 .m(2)
4801 .n(4)
4802 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004803 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004804 }
4805 }
4806
4807 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_gt_8_subtile) {
4808 TEST_REQUIRES_X86_AVX;
4809 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004810 for (uint32_t n = 1; n <= 4; n++) {
4811 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004812 GemmMicrokernelTester()
4813 .mr(2)
4814 .nr(4)
4815 .kr(2)
4816 .sr(1)
4817 .m(m)
4818 .n(n)
4819 .k(k)
4820 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004821 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004822 }
4823 }
4824 }
4825 }
4826
4827 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_div_8) {
4828 TEST_REQUIRES_X86_AVX;
4829 for (size_t k = 16; k <= 80; k += 8) {
4830 GemmMicrokernelTester()
4831 .mr(2)
4832 .nr(4)
4833 .kr(2)
4834 .sr(1)
4835 .m(2)
4836 .n(4)
4837 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004838 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004839 }
4840 }
4841
4842 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_div_8_subtile) {
4843 TEST_REQUIRES_X86_AVX;
4844 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004845 for (uint32_t n = 1; n <= 4; n++) {
4846 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004847 GemmMicrokernelTester()
4848 .mr(2)
4849 .nr(4)
4850 .kr(2)
4851 .sr(1)
4852 .m(m)
4853 .n(n)
4854 .k(k)
4855 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004856 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004857 }
4858 }
4859 }
4860 }
4861
4862 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_gt_4) {
4863 TEST_REQUIRES_X86_AVX;
4864 for (uint32_t n = 5; n < 8; n++) {
4865 for (size_t k = 1; k <= 40; k += 9) {
4866 GemmMicrokernelTester()
4867 .mr(2)
4868 .nr(4)
4869 .kr(2)
4870 .sr(1)
4871 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004872 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004873 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004874 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004875 }
4876 }
4877 }
4878
4879 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_gt_4_strided_cn) {
4880 TEST_REQUIRES_X86_AVX;
4881 for (uint32_t n = 5; n < 8; n++) {
4882 for (size_t k = 1; k <= 40; k += 9) {
4883 GemmMicrokernelTester()
4884 .mr(2)
4885 .nr(4)
4886 .kr(2)
4887 .sr(1)
4888 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004889 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004890 .k(k)
4891 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08004892 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004893 }
4894 }
4895 }
4896
4897 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_gt_4_subtile) {
4898 TEST_REQUIRES_X86_AVX;
4899 for (uint32_t n = 5; n < 8; n++) {
4900 for (size_t k = 1; k <= 40; k += 9) {
4901 for (uint32_t m = 1; m <= 2; m++) {
4902 GemmMicrokernelTester()
4903 .mr(2)
4904 .nr(4)
4905 .kr(2)
4906 .sr(1)
4907 .m(m)
4908 .n(n)
4909 .k(k)
4910 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004911 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004912 }
4913 }
4914 }
4915 }
4916
4917 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_div_4) {
4918 TEST_REQUIRES_X86_AVX;
4919 for (uint32_t n = 8; n <= 12; n += 4) {
4920 for (size_t k = 1; k <= 40; k += 9) {
4921 GemmMicrokernelTester()
4922 .mr(2)
4923 .nr(4)
4924 .kr(2)
4925 .sr(1)
4926 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004927 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004928 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004929 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004930 }
4931 }
4932 }
4933
4934 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_div_4_strided_cn) {
4935 TEST_REQUIRES_X86_AVX;
4936 for (uint32_t n = 8; n <= 12; n += 4) {
4937 for (size_t k = 1; k <= 40; k += 9) {
4938 GemmMicrokernelTester()
4939 .mr(2)
4940 .nr(4)
4941 .kr(2)
4942 .sr(1)
4943 .m(2)
4944 .n(n)
4945 .k(k)
4946 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08004947 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004948 }
4949 }
4950 }
4951
4952 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_div_4_subtile) {
4953 TEST_REQUIRES_X86_AVX;
4954 for (uint32_t n = 8; n <= 12; n += 4) {
4955 for (size_t k = 1; k <= 40; k += 9) {
4956 for (uint32_t m = 1; m <= 2; m++) {
4957 GemmMicrokernelTester()
4958 .mr(2)
4959 .nr(4)
4960 .kr(2)
4961 .sr(1)
4962 .m(m)
4963 .n(n)
4964 .k(k)
4965 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004966 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004967 }
4968 }
4969 }
4970 }
4971
4972 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, small_kernel) {
4973 TEST_REQUIRES_X86_AVX;
4974 for (size_t k = 1; k <= 40; k += 9) {
4975 GemmMicrokernelTester()
4976 .mr(2)
4977 .nr(4)
4978 .kr(2)
4979 .sr(1)
4980 .m(2)
4981 .n(4)
4982 .k(k)
4983 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08004984 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004985 }
4986 }
4987
4988 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, small_kernel_subtile) {
4989 TEST_REQUIRES_X86_AVX;
4990 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004991 for (uint32_t n = 1; n <= 4; n++) {
4992 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07004993 GemmMicrokernelTester()
4994 .mr(2)
4995 .nr(4)
4996 .kr(2)
4997 .sr(1)
4998 .m(m)
4999 .n(n)
5000 .k(k)
5001 .ks(3)
5002 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005003 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005004 }
5005 }
5006 }
5007 }
5008
5009 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_gt_4_small_kernel) {
5010 TEST_REQUIRES_X86_AVX;
5011 for (uint32_t n = 5; n < 8; n++) {
5012 for (size_t k = 1; k <= 40; k += 9) {
5013 GemmMicrokernelTester()
5014 .mr(2)
5015 .nr(4)
5016 .kr(2)
5017 .sr(1)
5018 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005019 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005020 .k(k)
5021 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08005022 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005023 }
5024 }
5025 }
5026
5027 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_div_4_small_kernel) {
5028 TEST_REQUIRES_X86_AVX;
5029 for (uint32_t n = 8; n <= 12; n += 4) {
5030 for (size_t k = 1; k <= 40; k += 9) {
5031 GemmMicrokernelTester()
5032 .mr(2)
5033 .nr(4)
5034 .kr(2)
5035 .sr(1)
5036 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005037 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005038 .k(k)
5039 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08005040 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005041 }
5042 }
5043 }
5044
5045 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, strided_cm_subtile) {
5046 TEST_REQUIRES_X86_AVX;
5047 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005048 for (uint32_t n = 1; n <= 4; n++) {
5049 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005050 GemmMicrokernelTester()
5051 .mr(2)
5052 .nr(4)
5053 .kr(2)
5054 .sr(1)
5055 .m(m)
5056 .n(n)
5057 .k(k)
5058 .cm_stride(7)
5059 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005060 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005061 }
5062 }
5063 }
5064 }
5065
5066 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, a_offset) {
5067 TEST_REQUIRES_X86_AVX;
5068 for (size_t k = 1; k <= 40; k += 9) {
5069 GemmMicrokernelTester()
5070 .mr(2)
5071 .nr(4)
5072 .kr(2)
5073 .sr(1)
5074 .m(2)
5075 .n(4)
5076 .k(k)
5077 .ks(3)
5078 .a_offset(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08005079 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005080 }
5081 }
5082
5083 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, zero) {
5084 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -08005085 for (size_t k = 1; k <= 40; k += 9) {
5086 for (uint32_t mz = 0; mz < 2; mz++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005087 GemmMicrokernelTester()
5088 .mr(2)
5089 .nr(4)
5090 .kr(2)
5091 .sr(1)
5092 .m(2)
5093 .n(4)
5094 .k(k)
5095 .ks(3)
5096 .a_offset(83)
5097 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08005098 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005099 }
5100 }
5101 }
5102
5103 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, qmin) {
5104 TEST_REQUIRES_X86_AVX;
5105 GemmMicrokernelTester()
5106 .mr(2)
5107 .nr(4)
5108 .kr(2)
5109 .sr(1)
5110 .m(2)
5111 .n(4)
5112 .k(8)
5113 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005114 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005115 }
5116
5117 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, qmax) {
5118 TEST_REQUIRES_X86_AVX;
5119 GemmMicrokernelTester()
5120 .mr(2)
5121 .nr(4)
5122 .kr(2)
5123 .sr(1)
5124 .m(2)
5125 .n(4)
5126 .k(8)
5127 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005128 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005129 }
5130
5131 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, strided_cm) {
5132 TEST_REQUIRES_X86_AVX;
5133 GemmMicrokernelTester()
5134 .mr(2)
5135 .nr(4)
5136 .kr(2)
5137 .sr(1)
5138 .m(2)
5139 .n(4)
5140 .k(8)
5141 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08005142 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005143 }
5144
5145 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, no_a_zero_point) {
5146 TEST_REQUIRES_X86_AVX;
5147 for (size_t k = 1; k <= 40; k += 9) {
5148 GemmMicrokernelTester()
5149 .mr(2)
5150 .nr(4)
5151 .kr(2)
5152 .sr(1)
5153 .m(2)
5154 .n(4)
5155 .k(k)
5156 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08005157 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005158 }
5159 }
5160
5161 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, no_b_zero_point) {
5162 TEST_REQUIRES_X86_AVX;
5163 for (size_t k = 1; k <= 40; k += 9) {
5164 GemmMicrokernelTester()
5165 .mr(2)
5166 .nr(4)
5167 .kr(2)
5168 .sr(1)
5169 .m(2)
5170 .n(4)
5171 .k(k)
5172 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08005173 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005174 }
5175 }
5176
5177 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, no_zero_point) {
5178 TEST_REQUIRES_X86_AVX;
5179 for (size_t k = 1; k <= 40; k += 9) {
5180 GemmMicrokernelTester()
5181 .mr(2)
5182 .nr(4)
5183 .kr(2)
5184 .sr(1)
5185 .m(2)
5186 .n(4)
5187 .k(k)
5188 .a_zero_point(0)
5189 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08005190 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005191 }
5192 }
5193#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5194
5195
5196#if XNN_ARCH_X86 || XNN_ARCH_X86_64
5197 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_eq_8) {
5198 TEST_REQUIRES_X86_AVX;
5199 GemmMicrokernelTester()
5200 .mr(3)
5201 .nr(4)
5202 .kr(2)
5203 .sr(1)
5204 .m(3)
5205 .n(4)
5206 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08005207 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005208 }
5209
5210 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, strided_cn) {
5211 TEST_REQUIRES_X86_AVX;
5212 GemmMicrokernelTester()
5213 .mr(3)
5214 .nr(4)
5215 .kr(2)
5216 .sr(1)
5217 .m(3)
5218 .n(4)
5219 .k(8)
5220 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08005221 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005222 }
5223
5224 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_eq_8_subtile) {
5225 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -08005226 for (uint32_t n = 1; n <= 4; n++) {
5227 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005228 GemmMicrokernelTester()
5229 .mr(3)
5230 .nr(4)
5231 .kr(2)
5232 .sr(1)
5233 .m(m)
5234 .n(n)
5235 .k(8)
5236 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005237 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005238 }
5239 }
5240 }
5241
5242 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_eq_8_subtile_m) {
5243 TEST_REQUIRES_X86_AVX;
5244 for (uint32_t m = 1; m <= 3; m++) {
5245 GemmMicrokernelTester()
5246 .mr(3)
5247 .nr(4)
5248 .kr(2)
5249 .sr(1)
5250 .m(m)
5251 .n(4)
5252 .k(8)
5253 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005254 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005255 }
5256 }
5257
5258 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_eq_8_subtile_n) {
5259 TEST_REQUIRES_X86_AVX;
5260 for (uint32_t n = 1; n <= 4; n++) {
5261 GemmMicrokernelTester()
5262 .mr(3)
5263 .nr(4)
5264 .kr(2)
5265 .sr(1)
5266 .m(3)
5267 .n(n)
5268 .k(8)
5269 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005270 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005271 }
5272 }
5273
5274 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_lt_8) {
5275 TEST_REQUIRES_X86_AVX;
5276 for (size_t k = 1; k < 8; k++) {
5277 GemmMicrokernelTester()
5278 .mr(3)
5279 .nr(4)
5280 .kr(2)
5281 .sr(1)
5282 .m(3)
5283 .n(4)
5284 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005285 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005286 }
5287 }
5288
5289 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_lt_8_subtile) {
5290 TEST_REQUIRES_X86_AVX;
5291 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005292 for (uint32_t n = 1; n <= 4; n++) {
5293 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005294 GemmMicrokernelTester()
5295 .mr(3)
5296 .nr(4)
5297 .kr(2)
5298 .sr(1)
5299 .m(m)
5300 .n(n)
5301 .k(k)
5302 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005303 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005304 }
5305 }
5306 }
5307 }
5308
5309 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_gt_8) {
5310 TEST_REQUIRES_X86_AVX;
5311 for (size_t k = 9; k < 16; k++) {
5312 GemmMicrokernelTester()
5313 .mr(3)
5314 .nr(4)
5315 .kr(2)
5316 .sr(1)
5317 .m(3)
5318 .n(4)
5319 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005320 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005321 }
5322 }
5323
5324 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_gt_8_subtile) {
5325 TEST_REQUIRES_X86_AVX;
5326 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005327 for (uint32_t n = 1; n <= 4; n++) {
5328 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005329 GemmMicrokernelTester()
5330 .mr(3)
5331 .nr(4)
5332 .kr(2)
5333 .sr(1)
5334 .m(m)
5335 .n(n)
5336 .k(k)
5337 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005338 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005339 }
5340 }
5341 }
5342 }
5343
5344 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_div_8) {
5345 TEST_REQUIRES_X86_AVX;
5346 for (size_t k = 16; k <= 80; k += 8) {
5347 GemmMicrokernelTester()
5348 .mr(3)
5349 .nr(4)
5350 .kr(2)
5351 .sr(1)
5352 .m(3)
5353 .n(4)
5354 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005355 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005356 }
5357 }
5358
5359 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_div_8_subtile) {
5360 TEST_REQUIRES_X86_AVX;
5361 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005362 for (uint32_t n = 1; n <= 4; n++) {
5363 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005364 GemmMicrokernelTester()
5365 .mr(3)
5366 .nr(4)
5367 .kr(2)
5368 .sr(1)
5369 .m(m)
5370 .n(n)
5371 .k(k)
5372 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005373 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005374 }
5375 }
5376 }
5377 }
5378
5379 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_gt_4) {
5380 TEST_REQUIRES_X86_AVX;
5381 for (uint32_t n = 5; n < 8; n++) {
5382 for (size_t k = 1; k <= 40; k += 9) {
5383 GemmMicrokernelTester()
5384 .mr(3)
5385 .nr(4)
5386 .kr(2)
5387 .sr(1)
5388 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005389 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005390 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005391 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005392 }
5393 }
5394 }
5395
5396 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_gt_4_strided_cn) {
5397 TEST_REQUIRES_X86_AVX;
5398 for (uint32_t n = 5; n < 8; n++) {
5399 for (size_t k = 1; k <= 40; k += 9) {
5400 GemmMicrokernelTester()
5401 .mr(3)
5402 .nr(4)
5403 .kr(2)
5404 .sr(1)
5405 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005406 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005407 .k(k)
5408 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08005409 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005410 }
5411 }
5412 }
5413
5414 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_gt_4_subtile) {
5415 TEST_REQUIRES_X86_AVX;
5416 for (uint32_t n = 5; n < 8; n++) {
5417 for (size_t k = 1; k <= 40; k += 9) {
5418 for (uint32_t m = 1; m <= 3; m++) {
5419 GemmMicrokernelTester()
5420 .mr(3)
5421 .nr(4)
5422 .kr(2)
5423 .sr(1)
5424 .m(m)
5425 .n(n)
5426 .k(k)
5427 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005428 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005429 }
5430 }
5431 }
5432 }
5433
5434 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_div_4) {
5435 TEST_REQUIRES_X86_AVX;
5436 for (uint32_t n = 8; n <= 12; n += 4) {
5437 for (size_t k = 1; k <= 40; k += 9) {
5438 GemmMicrokernelTester()
5439 .mr(3)
5440 .nr(4)
5441 .kr(2)
5442 .sr(1)
5443 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005444 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005445 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005446 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005447 }
5448 }
5449 }
5450
5451 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_div_4_strided_cn) {
5452 TEST_REQUIRES_X86_AVX;
5453 for (uint32_t n = 8; n <= 12; n += 4) {
5454 for (size_t k = 1; k <= 40; k += 9) {
5455 GemmMicrokernelTester()
5456 .mr(3)
5457 .nr(4)
5458 .kr(2)
5459 .sr(1)
5460 .m(3)
5461 .n(n)
5462 .k(k)
5463 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08005464 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005465 }
5466 }
5467 }
5468
5469 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_div_4_subtile) {
5470 TEST_REQUIRES_X86_AVX;
5471 for (uint32_t n = 8; n <= 12; n += 4) {
5472 for (size_t k = 1; k <= 40; k += 9) {
5473 for (uint32_t m = 1; m <= 3; m++) {
5474 GemmMicrokernelTester()
5475 .mr(3)
5476 .nr(4)
5477 .kr(2)
5478 .sr(1)
5479 .m(m)
5480 .n(n)
5481 .k(k)
5482 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005483 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005484 }
5485 }
5486 }
5487 }
5488
5489 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, small_kernel) {
5490 TEST_REQUIRES_X86_AVX;
5491 for (size_t k = 1; k <= 40; k += 9) {
5492 GemmMicrokernelTester()
5493 .mr(3)
5494 .nr(4)
5495 .kr(2)
5496 .sr(1)
5497 .m(3)
5498 .n(4)
5499 .k(k)
5500 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08005501 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005502 }
5503 }
5504
5505 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, small_kernel_subtile) {
5506 TEST_REQUIRES_X86_AVX;
5507 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005508 for (uint32_t n = 1; n <= 4; n++) {
5509 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005510 GemmMicrokernelTester()
5511 .mr(3)
5512 .nr(4)
5513 .kr(2)
5514 .sr(1)
5515 .m(m)
5516 .n(n)
5517 .k(k)
5518 .ks(3)
5519 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005520 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005521 }
5522 }
5523 }
5524 }
5525
5526 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_gt_4_small_kernel) {
5527 TEST_REQUIRES_X86_AVX;
5528 for (uint32_t n = 5; n < 8; n++) {
5529 for (size_t k = 1; k <= 40; k += 9) {
5530 GemmMicrokernelTester()
5531 .mr(3)
5532 .nr(4)
5533 .kr(2)
5534 .sr(1)
5535 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005536 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005537 .k(k)
5538 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08005539 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005540 }
5541 }
5542 }
5543
5544 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_div_4_small_kernel) {
5545 TEST_REQUIRES_X86_AVX;
5546 for (uint32_t n = 8; n <= 12; n += 4) {
5547 for (size_t k = 1; k <= 40; k += 9) {
5548 GemmMicrokernelTester()
5549 .mr(3)
5550 .nr(4)
5551 .kr(2)
5552 .sr(1)
5553 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005554 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005555 .k(k)
5556 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08005557 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005558 }
5559 }
5560 }
5561
5562 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, strided_cm_subtile) {
5563 TEST_REQUIRES_X86_AVX;
5564 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005565 for (uint32_t n = 1; n <= 4; n++) {
5566 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005567 GemmMicrokernelTester()
5568 .mr(3)
5569 .nr(4)
5570 .kr(2)
5571 .sr(1)
5572 .m(m)
5573 .n(n)
5574 .k(k)
5575 .cm_stride(7)
5576 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005577 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005578 }
5579 }
5580 }
5581 }
5582
5583 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, a_offset) {
5584 TEST_REQUIRES_X86_AVX;
5585 for (size_t k = 1; k <= 40; k += 9) {
5586 GemmMicrokernelTester()
5587 .mr(3)
5588 .nr(4)
5589 .kr(2)
5590 .sr(1)
5591 .m(3)
5592 .n(4)
5593 .k(k)
5594 .ks(3)
5595 .a_offset(127)
Marat Dukhan50323b82022-01-11 00:12:01 -08005596 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005597 }
5598 }
5599
5600 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, zero) {
5601 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -08005602 for (size_t k = 1; k <= 40; k += 9) {
5603 for (uint32_t mz = 0; mz < 3; mz++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005604 GemmMicrokernelTester()
5605 .mr(3)
5606 .nr(4)
5607 .kr(2)
5608 .sr(1)
5609 .m(3)
5610 .n(4)
5611 .k(k)
5612 .ks(3)
5613 .a_offset(127)
5614 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08005615 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005616 }
5617 }
5618 }
5619
5620 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, qmin) {
5621 TEST_REQUIRES_X86_AVX;
5622 GemmMicrokernelTester()
5623 .mr(3)
5624 .nr(4)
5625 .kr(2)
5626 .sr(1)
5627 .m(3)
5628 .n(4)
5629 .k(8)
5630 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005631 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005632 }
5633
5634 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, qmax) {
5635 TEST_REQUIRES_X86_AVX;
5636 GemmMicrokernelTester()
5637 .mr(3)
5638 .nr(4)
5639 .kr(2)
5640 .sr(1)
5641 .m(3)
5642 .n(4)
5643 .k(8)
5644 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005645 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005646 }
5647
5648 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, strided_cm) {
5649 TEST_REQUIRES_X86_AVX;
5650 GemmMicrokernelTester()
5651 .mr(3)
5652 .nr(4)
5653 .kr(2)
5654 .sr(1)
5655 .m(3)
5656 .n(4)
5657 .k(8)
5658 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08005659 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005660 }
5661
5662 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, no_a_zero_point) {
5663 TEST_REQUIRES_X86_AVX;
5664 for (size_t k = 1; k <= 40; k += 9) {
5665 GemmMicrokernelTester()
5666 .mr(3)
5667 .nr(4)
5668 .kr(2)
5669 .sr(1)
5670 .m(3)
5671 .n(4)
5672 .k(k)
5673 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08005674 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005675 }
5676 }
5677
5678 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, no_b_zero_point) {
5679 TEST_REQUIRES_X86_AVX;
5680 for (size_t k = 1; k <= 40; k += 9) {
5681 GemmMicrokernelTester()
5682 .mr(3)
5683 .nr(4)
5684 .kr(2)
5685 .sr(1)
5686 .m(3)
5687 .n(4)
5688 .k(k)
5689 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08005690 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005691 }
5692 }
5693
5694 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, no_zero_point) {
5695 TEST_REQUIRES_X86_AVX;
5696 for (size_t k = 1; k <= 40; k += 9) {
5697 GemmMicrokernelTester()
5698 .mr(3)
5699 .nr(4)
5700 .kr(2)
5701 .sr(1)
5702 .m(3)
5703 .n(4)
5704 .k(k)
5705 .a_zero_point(0)
5706 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08005707 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005708 }
5709 }
5710#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5711
5712
5713#if XNN_ARCH_X86 || XNN_ARCH_X86_64
5714 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_eq_8) {
5715 TEST_REQUIRES_X86_AVX;
5716 GemmMicrokernelTester()
5717 .mr(4)
5718 .nr(4)
5719 .kr(2)
5720 .sr(1)
5721 .m(4)
5722 .n(4)
5723 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08005724 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005725 }
5726
5727 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, strided_cn) {
5728 TEST_REQUIRES_X86_AVX;
5729 GemmMicrokernelTester()
5730 .mr(4)
5731 .nr(4)
5732 .kr(2)
5733 .sr(1)
5734 .m(4)
5735 .n(4)
5736 .k(8)
5737 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08005738 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005739 }
5740
5741 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_eq_8_subtile) {
5742 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -08005743 for (uint32_t n = 1; n <= 4; n++) {
5744 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005745 GemmMicrokernelTester()
5746 .mr(4)
5747 .nr(4)
5748 .kr(2)
5749 .sr(1)
5750 .m(m)
5751 .n(n)
5752 .k(8)
5753 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005754 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005755 }
5756 }
5757 }
5758
5759 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_eq_8_subtile_m) {
5760 TEST_REQUIRES_X86_AVX;
5761 for (uint32_t m = 1; m <= 4; m++) {
5762 GemmMicrokernelTester()
5763 .mr(4)
5764 .nr(4)
5765 .kr(2)
5766 .sr(1)
5767 .m(m)
5768 .n(4)
5769 .k(8)
5770 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005771 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005772 }
5773 }
5774
5775 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_eq_8_subtile_n) {
5776 TEST_REQUIRES_X86_AVX;
5777 for (uint32_t n = 1; n <= 4; n++) {
5778 GemmMicrokernelTester()
5779 .mr(4)
5780 .nr(4)
5781 .kr(2)
5782 .sr(1)
5783 .m(4)
5784 .n(n)
5785 .k(8)
5786 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005787 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005788 }
5789 }
5790
5791 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_lt_8) {
5792 TEST_REQUIRES_X86_AVX;
5793 for (size_t k = 1; k < 8; k++) {
5794 GemmMicrokernelTester()
5795 .mr(4)
5796 .nr(4)
5797 .kr(2)
5798 .sr(1)
5799 .m(4)
5800 .n(4)
5801 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005802 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005803 }
5804 }
5805
5806 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_lt_8_subtile) {
5807 TEST_REQUIRES_X86_AVX;
5808 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005809 for (uint32_t n = 1; n <= 4; n++) {
5810 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005811 GemmMicrokernelTester()
5812 .mr(4)
5813 .nr(4)
5814 .kr(2)
5815 .sr(1)
5816 .m(m)
5817 .n(n)
5818 .k(k)
5819 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005820 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005821 }
5822 }
5823 }
5824 }
5825
5826 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_gt_8) {
5827 TEST_REQUIRES_X86_AVX;
5828 for (size_t k = 9; k < 16; k++) {
5829 GemmMicrokernelTester()
5830 .mr(4)
5831 .nr(4)
5832 .kr(2)
5833 .sr(1)
5834 .m(4)
5835 .n(4)
5836 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005837 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005838 }
5839 }
5840
5841 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_gt_8_subtile) {
5842 TEST_REQUIRES_X86_AVX;
5843 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005844 for (uint32_t n = 1; n <= 4; n++) {
5845 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005846 GemmMicrokernelTester()
5847 .mr(4)
5848 .nr(4)
5849 .kr(2)
5850 .sr(1)
5851 .m(m)
5852 .n(n)
5853 .k(k)
5854 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005855 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005856 }
5857 }
5858 }
5859 }
5860
5861 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_div_8) {
5862 TEST_REQUIRES_X86_AVX;
5863 for (size_t k = 16; k <= 80; k += 8) {
5864 GemmMicrokernelTester()
5865 .mr(4)
5866 .nr(4)
5867 .kr(2)
5868 .sr(1)
5869 .m(4)
5870 .n(4)
5871 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005872 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005873 }
5874 }
5875
5876 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_div_8_subtile) {
5877 TEST_REQUIRES_X86_AVX;
5878 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005879 for (uint32_t n = 1; n <= 4; n++) {
5880 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005881 GemmMicrokernelTester()
5882 .mr(4)
5883 .nr(4)
5884 .kr(2)
5885 .sr(1)
5886 .m(m)
5887 .n(n)
5888 .k(k)
5889 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005890 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005891 }
5892 }
5893 }
5894 }
5895
5896 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_gt_4) {
5897 TEST_REQUIRES_X86_AVX;
5898 for (uint32_t n = 5; n < 8; n++) {
5899 for (size_t k = 1; k <= 40; k += 9) {
5900 GemmMicrokernelTester()
5901 .mr(4)
5902 .nr(4)
5903 .kr(2)
5904 .sr(1)
5905 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005906 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005907 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005908 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005909 }
5910 }
5911 }
5912
5913 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_gt_4_strided_cn) {
5914 TEST_REQUIRES_X86_AVX;
5915 for (uint32_t n = 5; n < 8; n++) {
5916 for (size_t k = 1; k <= 40; k += 9) {
5917 GemmMicrokernelTester()
5918 .mr(4)
5919 .nr(4)
5920 .kr(2)
5921 .sr(1)
5922 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005923 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005924 .k(k)
5925 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08005926 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005927 }
5928 }
5929 }
5930
5931 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_gt_4_subtile) {
5932 TEST_REQUIRES_X86_AVX;
5933 for (uint32_t n = 5; n < 8; n++) {
5934 for (size_t k = 1; k <= 40; k += 9) {
5935 for (uint32_t m = 1; m <= 4; m++) {
5936 GemmMicrokernelTester()
5937 .mr(4)
5938 .nr(4)
5939 .kr(2)
5940 .sr(1)
5941 .m(m)
5942 .n(n)
5943 .k(k)
5944 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005945 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005946 }
5947 }
5948 }
5949 }
5950
5951 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_div_4) {
5952 TEST_REQUIRES_X86_AVX;
5953 for (uint32_t n = 8; n <= 12; n += 4) {
5954 for (size_t k = 1; k <= 40; k += 9) {
5955 GemmMicrokernelTester()
5956 .mr(4)
5957 .nr(4)
5958 .kr(2)
5959 .sr(1)
5960 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005961 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005962 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005963 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005964 }
5965 }
5966 }
5967
5968 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_div_4_strided_cn) {
5969 TEST_REQUIRES_X86_AVX;
5970 for (uint32_t n = 8; n <= 12; n += 4) {
5971 for (size_t k = 1; k <= 40; k += 9) {
5972 GemmMicrokernelTester()
5973 .mr(4)
5974 .nr(4)
5975 .kr(2)
5976 .sr(1)
5977 .m(4)
5978 .n(n)
5979 .k(k)
5980 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08005981 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07005982 }
5983 }
5984 }
5985
5986 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_div_4_subtile) {
5987 TEST_REQUIRES_X86_AVX;
5988 for (uint32_t n = 8; n <= 12; n += 4) {
5989 for (size_t k = 1; k <= 40; k += 9) {
5990 for (uint32_t m = 1; m <= 4; m++) {
5991 GemmMicrokernelTester()
5992 .mr(4)
5993 .nr(4)
5994 .kr(2)
5995 .sr(1)
5996 .m(m)
5997 .n(n)
5998 .k(k)
5999 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006000 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006001 }
6002 }
6003 }
6004 }
6005
6006 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, small_kernel) {
6007 TEST_REQUIRES_X86_AVX;
6008 for (size_t k = 1; k <= 40; k += 9) {
6009 GemmMicrokernelTester()
6010 .mr(4)
6011 .nr(4)
6012 .kr(2)
6013 .sr(1)
6014 .m(4)
6015 .n(4)
6016 .k(k)
6017 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08006018 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006019 }
6020 }
6021
6022 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, small_kernel_subtile) {
6023 TEST_REQUIRES_X86_AVX;
6024 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006025 for (uint32_t n = 1; n <= 4; n++) {
6026 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006027 GemmMicrokernelTester()
6028 .mr(4)
6029 .nr(4)
6030 .kr(2)
6031 .sr(1)
6032 .m(m)
6033 .n(n)
6034 .k(k)
6035 .ks(3)
6036 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006037 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006038 }
6039 }
6040 }
6041 }
6042
6043 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_gt_4_small_kernel) {
6044 TEST_REQUIRES_X86_AVX;
6045 for (uint32_t n = 5; n < 8; n++) {
6046 for (size_t k = 1; k <= 40; k += 9) {
6047 GemmMicrokernelTester()
6048 .mr(4)
6049 .nr(4)
6050 .kr(2)
6051 .sr(1)
6052 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006053 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006054 .k(k)
6055 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08006056 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006057 }
6058 }
6059 }
6060
6061 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_div_4_small_kernel) {
6062 TEST_REQUIRES_X86_AVX;
6063 for (uint32_t n = 8; n <= 12; n += 4) {
6064 for (size_t k = 1; k <= 40; k += 9) {
6065 GemmMicrokernelTester()
6066 .mr(4)
6067 .nr(4)
6068 .kr(2)
6069 .sr(1)
6070 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006071 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006072 .k(k)
6073 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08006074 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006075 }
6076 }
6077 }
6078
6079 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, strided_cm_subtile) {
6080 TEST_REQUIRES_X86_AVX;
6081 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006082 for (uint32_t n = 1; n <= 4; n++) {
6083 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006084 GemmMicrokernelTester()
6085 .mr(4)
6086 .nr(4)
6087 .kr(2)
6088 .sr(1)
6089 .m(m)
6090 .n(n)
6091 .k(k)
6092 .cm_stride(7)
6093 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006094 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006095 }
6096 }
6097 }
6098 }
6099
6100 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, a_offset) {
6101 TEST_REQUIRES_X86_AVX;
6102 for (size_t k = 1; k <= 40; k += 9) {
6103 GemmMicrokernelTester()
6104 .mr(4)
6105 .nr(4)
6106 .kr(2)
6107 .sr(1)
6108 .m(4)
6109 .n(4)
6110 .k(k)
6111 .ks(3)
6112 .a_offset(163)
Marat Dukhan50323b82022-01-11 00:12:01 -08006113 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006114 }
6115 }
6116
6117 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, zero) {
6118 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -08006119 for (size_t k = 1; k <= 40; k += 9) {
6120 for (uint32_t mz = 0; mz < 4; mz++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006121 GemmMicrokernelTester()
6122 .mr(4)
6123 .nr(4)
6124 .kr(2)
6125 .sr(1)
6126 .m(4)
6127 .n(4)
6128 .k(k)
6129 .ks(3)
6130 .a_offset(163)
6131 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08006132 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006133 }
6134 }
6135 }
6136
6137 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, qmin) {
6138 TEST_REQUIRES_X86_AVX;
6139 GemmMicrokernelTester()
6140 .mr(4)
6141 .nr(4)
6142 .kr(2)
6143 .sr(1)
6144 .m(4)
6145 .n(4)
6146 .k(8)
6147 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006148 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006149 }
6150
6151 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, qmax) {
6152 TEST_REQUIRES_X86_AVX;
6153 GemmMicrokernelTester()
6154 .mr(4)
6155 .nr(4)
6156 .kr(2)
6157 .sr(1)
6158 .m(4)
6159 .n(4)
6160 .k(8)
6161 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006162 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006163 }
6164
6165 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, strided_cm) {
6166 TEST_REQUIRES_X86_AVX;
6167 GemmMicrokernelTester()
6168 .mr(4)
6169 .nr(4)
6170 .kr(2)
6171 .sr(1)
6172 .m(4)
6173 .n(4)
6174 .k(8)
6175 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08006176 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006177 }
6178
6179 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, no_a_zero_point) {
6180 TEST_REQUIRES_X86_AVX;
6181 for (size_t k = 1; k <= 40; k += 9) {
6182 GemmMicrokernelTester()
6183 .mr(4)
6184 .nr(4)
6185 .kr(2)
6186 .sr(1)
6187 .m(4)
6188 .n(4)
6189 .k(k)
6190 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08006191 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006192 }
6193 }
6194
6195 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, no_b_zero_point) {
6196 TEST_REQUIRES_X86_AVX;
6197 for (size_t k = 1; k <= 40; k += 9) {
6198 GemmMicrokernelTester()
6199 .mr(4)
6200 .nr(4)
6201 .kr(2)
6202 .sr(1)
6203 .m(4)
6204 .n(4)
6205 .k(k)
6206 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08006207 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006208 }
6209 }
6210
6211 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, no_zero_point) {
6212 TEST_REQUIRES_X86_AVX;
6213 for (size_t k = 1; k <= 40; k += 9) {
6214 GemmMicrokernelTester()
6215 .mr(4)
6216 .nr(4)
6217 .kr(2)
6218 .sr(1)
6219 .m(4)
6220 .n(4)
6221 .k(k)
6222 .a_zero_point(0)
6223 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08006224 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006225 }
6226 }
6227#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6228
6229
6230#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006231 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_eq_8) {
6232 TEST_REQUIRES_X86_XOP;
6233 GemmMicrokernelTester()
6234 .mr(2)
6235 .nr(4)
6236 .kr(2)
6237 .sr(1)
6238 .m(2)
6239 .n(4)
6240 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08006241 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006242 }
6243
6244 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, strided_cn) {
6245 TEST_REQUIRES_X86_XOP;
6246 GemmMicrokernelTester()
6247 .mr(2)
6248 .nr(4)
6249 .kr(2)
6250 .sr(1)
6251 .m(2)
6252 .n(4)
6253 .k(8)
6254 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08006255 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006256 }
6257
6258 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_eq_8_subtile) {
6259 TEST_REQUIRES_X86_XOP;
Zhi An Ng83844ae2022-01-14 09:52:25 -08006260 for (uint32_t n = 1; n <= 4; n++) {
6261 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006262 GemmMicrokernelTester()
6263 .mr(2)
6264 .nr(4)
6265 .kr(2)
6266 .sr(1)
6267 .m(m)
6268 .n(n)
6269 .k(8)
6270 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006271 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006272 }
6273 }
6274 }
6275
6276 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_eq_8_subtile_m) {
6277 TEST_REQUIRES_X86_XOP;
6278 for (uint32_t m = 1; m <= 2; m++) {
6279 GemmMicrokernelTester()
6280 .mr(2)
6281 .nr(4)
6282 .kr(2)
6283 .sr(1)
6284 .m(m)
6285 .n(4)
6286 .k(8)
6287 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006288 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006289 }
6290 }
6291
6292 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_eq_8_subtile_n) {
6293 TEST_REQUIRES_X86_XOP;
6294 for (uint32_t n = 1; n <= 4; n++) {
6295 GemmMicrokernelTester()
6296 .mr(2)
6297 .nr(4)
6298 .kr(2)
6299 .sr(1)
6300 .m(2)
6301 .n(n)
6302 .k(8)
6303 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006304 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006305 }
6306 }
6307
6308 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_lt_8) {
6309 TEST_REQUIRES_X86_XOP;
6310 for (size_t k = 1; k < 8; k++) {
6311 GemmMicrokernelTester()
6312 .mr(2)
6313 .nr(4)
6314 .kr(2)
6315 .sr(1)
6316 .m(2)
6317 .n(4)
6318 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006319 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006320 }
6321 }
6322
6323 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_lt_8_subtile) {
6324 TEST_REQUIRES_X86_XOP;
6325 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006326 for (uint32_t n = 1; n <= 4; n++) {
6327 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006328 GemmMicrokernelTester()
6329 .mr(2)
6330 .nr(4)
6331 .kr(2)
6332 .sr(1)
6333 .m(m)
6334 .n(n)
6335 .k(k)
6336 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006337 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006338 }
6339 }
6340 }
6341 }
6342
6343 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_gt_8) {
6344 TEST_REQUIRES_X86_XOP;
6345 for (size_t k = 9; k < 16; k++) {
6346 GemmMicrokernelTester()
6347 .mr(2)
6348 .nr(4)
6349 .kr(2)
6350 .sr(1)
6351 .m(2)
6352 .n(4)
6353 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006354 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006355 }
6356 }
6357
6358 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_gt_8_subtile) {
6359 TEST_REQUIRES_X86_XOP;
6360 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006361 for (uint32_t n = 1; n <= 4; n++) {
6362 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006363 GemmMicrokernelTester()
6364 .mr(2)
6365 .nr(4)
6366 .kr(2)
6367 .sr(1)
6368 .m(m)
6369 .n(n)
6370 .k(k)
6371 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006372 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006373 }
6374 }
6375 }
6376 }
6377
6378 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_div_8) {
6379 TEST_REQUIRES_X86_XOP;
6380 for (size_t k = 16; k <= 80; k += 8) {
6381 GemmMicrokernelTester()
6382 .mr(2)
6383 .nr(4)
6384 .kr(2)
6385 .sr(1)
6386 .m(2)
6387 .n(4)
6388 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006389 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006390 }
6391 }
6392
6393 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_div_8_subtile) {
6394 TEST_REQUIRES_X86_XOP;
6395 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006396 for (uint32_t n = 1; n <= 4; n++) {
6397 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006398 GemmMicrokernelTester()
6399 .mr(2)
6400 .nr(4)
6401 .kr(2)
6402 .sr(1)
6403 .m(m)
6404 .n(n)
6405 .k(k)
6406 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006407 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006408 }
6409 }
6410 }
6411 }
6412
6413 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_gt_4) {
6414 TEST_REQUIRES_X86_XOP;
6415 for (uint32_t n = 5; n < 8; n++) {
6416 for (size_t k = 1; k <= 40; k += 9) {
6417 GemmMicrokernelTester()
6418 .mr(2)
6419 .nr(4)
6420 .kr(2)
6421 .sr(1)
6422 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006423 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006424 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006425 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006426 }
6427 }
6428 }
6429
6430 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_gt_4_strided_cn) {
6431 TEST_REQUIRES_X86_XOP;
6432 for (uint32_t n = 5; n < 8; n++) {
6433 for (size_t k = 1; k <= 40; k += 9) {
6434 GemmMicrokernelTester()
6435 .mr(2)
6436 .nr(4)
6437 .kr(2)
6438 .sr(1)
6439 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006440 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006441 .k(k)
6442 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08006443 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006444 }
6445 }
6446 }
6447
6448 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_gt_4_subtile) {
6449 TEST_REQUIRES_X86_XOP;
6450 for (uint32_t n = 5; n < 8; n++) {
6451 for (size_t k = 1; k <= 40; k += 9) {
6452 for (uint32_t m = 1; m <= 2; m++) {
6453 GemmMicrokernelTester()
6454 .mr(2)
6455 .nr(4)
6456 .kr(2)
6457 .sr(1)
6458 .m(m)
6459 .n(n)
6460 .k(k)
6461 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006462 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006463 }
6464 }
6465 }
6466 }
6467
6468 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_div_4) {
6469 TEST_REQUIRES_X86_XOP;
6470 for (uint32_t n = 8; n <= 12; n += 4) {
6471 for (size_t k = 1; k <= 40; k += 9) {
6472 GemmMicrokernelTester()
6473 .mr(2)
6474 .nr(4)
6475 .kr(2)
6476 .sr(1)
6477 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006478 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006479 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006480 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006481 }
6482 }
6483 }
6484
6485 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_div_4_strided_cn) {
6486 TEST_REQUIRES_X86_XOP;
6487 for (uint32_t n = 8; n <= 12; n += 4) {
6488 for (size_t k = 1; k <= 40; k += 9) {
6489 GemmMicrokernelTester()
6490 .mr(2)
6491 .nr(4)
6492 .kr(2)
6493 .sr(1)
6494 .m(2)
6495 .n(n)
6496 .k(k)
6497 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08006498 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006499 }
6500 }
6501 }
6502
6503 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_div_4_subtile) {
6504 TEST_REQUIRES_X86_XOP;
6505 for (uint32_t n = 8; n <= 12; n += 4) {
6506 for (size_t k = 1; k <= 40; k += 9) {
6507 for (uint32_t m = 1; m <= 2; m++) {
6508 GemmMicrokernelTester()
6509 .mr(2)
6510 .nr(4)
6511 .kr(2)
6512 .sr(1)
6513 .m(m)
6514 .n(n)
6515 .k(k)
6516 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006517 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006518 }
6519 }
6520 }
6521 }
6522
6523 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, small_kernel) {
6524 TEST_REQUIRES_X86_XOP;
6525 for (size_t k = 1; k <= 40; k += 9) {
6526 GemmMicrokernelTester()
6527 .mr(2)
6528 .nr(4)
6529 .kr(2)
6530 .sr(1)
6531 .m(2)
6532 .n(4)
6533 .k(k)
6534 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08006535 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006536 }
6537 }
6538
6539 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, small_kernel_subtile) {
6540 TEST_REQUIRES_X86_XOP;
6541 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006542 for (uint32_t n = 1; n <= 4; n++) {
6543 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006544 GemmMicrokernelTester()
6545 .mr(2)
6546 .nr(4)
6547 .kr(2)
6548 .sr(1)
6549 .m(m)
6550 .n(n)
6551 .k(k)
6552 .ks(3)
6553 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006554 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006555 }
6556 }
6557 }
6558 }
6559
6560 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_gt_4_small_kernel) {
6561 TEST_REQUIRES_X86_XOP;
6562 for (uint32_t n = 5; n < 8; n++) {
6563 for (size_t k = 1; k <= 40; k += 9) {
6564 GemmMicrokernelTester()
6565 .mr(2)
6566 .nr(4)
6567 .kr(2)
6568 .sr(1)
6569 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006570 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006571 .k(k)
6572 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08006573 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006574 }
6575 }
6576 }
6577
6578 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_div_4_small_kernel) {
6579 TEST_REQUIRES_X86_XOP;
6580 for (uint32_t n = 8; n <= 12; n += 4) {
6581 for (size_t k = 1; k <= 40; k += 9) {
6582 GemmMicrokernelTester()
6583 .mr(2)
6584 .nr(4)
6585 .kr(2)
6586 .sr(1)
6587 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006588 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006589 .k(k)
6590 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08006591 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006592 }
6593 }
6594 }
6595
6596 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, strided_cm_subtile) {
6597 TEST_REQUIRES_X86_XOP;
6598 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006599 for (uint32_t n = 1; n <= 4; n++) {
6600 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006601 GemmMicrokernelTester()
6602 .mr(2)
6603 .nr(4)
6604 .kr(2)
6605 .sr(1)
6606 .m(m)
6607 .n(n)
6608 .k(k)
6609 .cm_stride(7)
6610 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006611 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006612 }
6613 }
6614 }
6615 }
6616
6617 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, a_offset) {
6618 TEST_REQUIRES_X86_XOP;
6619 for (size_t k = 1; k <= 40; k += 9) {
6620 GemmMicrokernelTester()
6621 .mr(2)
6622 .nr(4)
6623 .kr(2)
6624 .sr(1)
6625 .m(2)
6626 .n(4)
6627 .k(k)
6628 .ks(3)
6629 .a_offset(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08006630 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006631 }
6632 }
6633
6634 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, zero) {
6635 TEST_REQUIRES_X86_XOP;
Zhi An Ng83844ae2022-01-14 09:52:25 -08006636 for (size_t k = 1; k <= 40; k += 9) {
6637 for (uint32_t mz = 0; mz < 2; mz++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006638 GemmMicrokernelTester()
6639 .mr(2)
6640 .nr(4)
6641 .kr(2)
6642 .sr(1)
6643 .m(2)
6644 .n(4)
6645 .k(k)
6646 .ks(3)
6647 .a_offset(83)
6648 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08006649 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006650 }
6651 }
6652 }
6653
6654 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, qmin) {
6655 TEST_REQUIRES_X86_XOP;
6656 GemmMicrokernelTester()
6657 .mr(2)
6658 .nr(4)
6659 .kr(2)
6660 .sr(1)
6661 .m(2)
6662 .n(4)
6663 .k(8)
6664 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006665 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006666 }
6667
6668 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, qmax) {
6669 TEST_REQUIRES_X86_XOP;
6670 GemmMicrokernelTester()
6671 .mr(2)
6672 .nr(4)
6673 .kr(2)
6674 .sr(1)
6675 .m(2)
6676 .n(4)
6677 .k(8)
6678 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006679 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006680 }
6681
6682 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, strided_cm) {
6683 TEST_REQUIRES_X86_XOP;
6684 GemmMicrokernelTester()
6685 .mr(2)
6686 .nr(4)
6687 .kr(2)
6688 .sr(1)
6689 .m(2)
6690 .n(4)
6691 .k(8)
6692 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08006693 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006694 }
6695
6696 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, no_a_zero_point) {
6697 TEST_REQUIRES_X86_XOP;
6698 for (size_t k = 1; k <= 40; k += 9) {
6699 GemmMicrokernelTester()
6700 .mr(2)
6701 .nr(4)
6702 .kr(2)
6703 .sr(1)
6704 .m(2)
6705 .n(4)
6706 .k(k)
6707 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08006708 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006709 }
6710 }
6711
6712 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, no_b_zero_point) {
6713 TEST_REQUIRES_X86_XOP;
6714 for (size_t k = 1; k <= 40; k += 9) {
6715 GemmMicrokernelTester()
6716 .mr(2)
6717 .nr(4)
6718 .kr(2)
6719 .sr(1)
6720 .m(2)
6721 .n(4)
6722 .k(k)
6723 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08006724 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006725 }
6726 }
6727
6728 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, no_zero_point) {
6729 TEST_REQUIRES_X86_XOP;
6730 for (size_t k = 1; k <= 40; k += 9) {
6731 GemmMicrokernelTester()
6732 .mr(2)
6733 .nr(4)
6734 .kr(2)
6735 .sr(1)
6736 .m(2)
6737 .n(4)
6738 .k(k)
6739 .a_zero_point(0)
6740 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08006741 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006742 }
6743 }
6744#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6745
6746
6747#if XNN_ARCH_X86 || XNN_ARCH_X86_64
6748 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_eq_8) {
6749 TEST_REQUIRES_X86_XOP;
6750 GemmMicrokernelTester()
6751 .mr(3)
6752 .nr(4)
6753 .kr(2)
6754 .sr(1)
6755 .m(3)
6756 .n(4)
6757 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08006758 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006759 }
6760
6761 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, strided_cn) {
6762 TEST_REQUIRES_X86_XOP;
6763 GemmMicrokernelTester()
6764 .mr(3)
6765 .nr(4)
6766 .kr(2)
6767 .sr(1)
6768 .m(3)
6769 .n(4)
6770 .k(8)
6771 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08006772 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006773 }
6774
6775 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_eq_8_subtile) {
6776 TEST_REQUIRES_X86_XOP;
Zhi An Ng83844ae2022-01-14 09:52:25 -08006777 for (uint32_t n = 1; n <= 4; n++) {
6778 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006779 GemmMicrokernelTester()
6780 .mr(3)
6781 .nr(4)
6782 .kr(2)
6783 .sr(1)
6784 .m(m)
6785 .n(n)
6786 .k(8)
6787 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006788 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006789 }
6790 }
6791 }
6792
6793 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_eq_8_subtile_m) {
6794 TEST_REQUIRES_X86_XOP;
6795 for (uint32_t m = 1; m <= 3; m++) {
6796 GemmMicrokernelTester()
6797 .mr(3)
6798 .nr(4)
6799 .kr(2)
6800 .sr(1)
6801 .m(m)
6802 .n(4)
6803 .k(8)
6804 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006805 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006806 }
6807 }
6808
6809 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_eq_8_subtile_n) {
6810 TEST_REQUIRES_X86_XOP;
6811 for (uint32_t n = 1; n <= 4; n++) {
6812 GemmMicrokernelTester()
6813 .mr(3)
6814 .nr(4)
6815 .kr(2)
6816 .sr(1)
6817 .m(3)
6818 .n(n)
6819 .k(8)
6820 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006821 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006822 }
6823 }
6824
6825 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_lt_8) {
6826 TEST_REQUIRES_X86_XOP;
6827 for (size_t k = 1; k < 8; k++) {
6828 GemmMicrokernelTester()
6829 .mr(3)
6830 .nr(4)
6831 .kr(2)
6832 .sr(1)
6833 .m(3)
6834 .n(4)
6835 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006836 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006837 }
6838 }
6839
6840 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_lt_8_subtile) {
6841 TEST_REQUIRES_X86_XOP;
6842 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006843 for (uint32_t n = 1; n <= 4; n++) {
6844 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006845 GemmMicrokernelTester()
6846 .mr(3)
6847 .nr(4)
6848 .kr(2)
6849 .sr(1)
6850 .m(m)
6851 .n(n)
6852 .k(k)
6853 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006854 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006855 }
6856 }
6857 }
6858 }
6859
6860 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_gt_8) {
6861 TEST_REQUIRES_X86_XOP;
6862 for (size_t k = 9; k < 16; k++) {
6863 GemmMicrokernelTester()
6864 .mr(3)
6865 .nr(4)
6866 .kr(2)
6867 .sr(1)
6868 .m(3)
6869 .n(4)
6870 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006871 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006872 }
6873 }
6874
6875 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_gt_8_subtile) {
6876 TEST_REQUIRES_X86_XOP;
6877 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006878 for (uint32_t n = 1; n <= 4; n++) {
6879 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006880 GemmMicrokernelTester()
6881 .mr(3)
6882 .nr(4)
6883 .kr(2)
6884 .sr(1)
6885 .m(m)
6886 .n(n)
6887 .k(k)
6888 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006889 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006890 }
6891 }
6892 }
6893 }
6894
6895 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_div_8) {
6896 TEST_REQUIRES_X86_XOP;
6897 for (size_t k = 16; k <= 80; k += 8) {
6898 GemmMicrokernelTester()
6899 .mr(3)
6900 .nr(4)
6901 .kr(2)
6902 .sr(1)
6903 .m(3)
6904 .n(4)
6905 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006906 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006907 }
6908 }
6909
6910 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_div_8_subtile) {
6911 TEST_REQUIRES_X86_XOP;
6912 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006913 for (uint32_t n = 1; n <= 4; n++) {
6914 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006915 GemmMicrokernelTester()
6916 .mr(3)
6917 .nr(4)
6918 .kr(2)
6919 .sr(1)
6920 .m(m)
6921 .n(n)
6922 .k(k)
6923 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006924 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006925 }
6926 }
6927 }
6928 }
6929
6930 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_gt_4) {
6931 TEST_REQUIRES_X86_XOP;
6932 for (uint32_t n = 5; n < 8; n++) {
6933 for (size_t k = 1; k <= 40; k += 9) {
6934 GemmMicrokernelTester()
6935 .mr(3)
6936 .nr(4)
6937 .kr(2)
6938 .sr(1)
6939 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006940 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006941 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006942 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006943 }
6944 }
6945 }
6946
6947 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_gt_4_strided_cn) {
6948 TEST_REQUIRES_X86_XOP;
6949 for (uint32_t n = 5; n < 8; n++) {
6950 for (size_t k = 1; k <= 40; k += 9) {
6951 GemmMicrokernelTester()
6952 .mr(3)
6953 .nr(4)
6954 .kr(2)
6955 .sr(1)
6956 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006957 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006958 .k(k)
6959 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08006960 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006961 }
6962 }
6963 }
6964
6965 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_gt_4_subtile) {
6966 TEST_REQUIRES_X86_XOP;
6967 for (uint32_t n = 5; n < 8; n++) {
6968 for (size_t k = 1; k <= 40; k += 9) {
6969 for (uint32_t m = 1; m <= 3; m++) {
6970 GemmMicrokernelTester()
6971 .mr(3)
6972 .nr(4)
6973 .kr(2)
6974 .sr(1)
6975 .m(m)
6976 .n(n)
6977 .k(k)
6978 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006979 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006980 }
6981 }
6982 }
6983 }
6984
6985 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_div_4) {
6986 TEST_REQUIRES_X86_XOP;
6987 for (uint32_t n = 8; n <= 12; n += 4) {
6988 for (size_t k = 1; k <= 40; k += 9) {
6989 GemmMicrokernelTester()
6990 .mr(3)
6991 .nr(4)
6992 .kr(2)
6993 .sr(1)
6994 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006995 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006996 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006997 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07006998 }
6999 }
7000 }
7001
7002 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_div_4_strided_cn) {
7003 TEST_REQUIRES_X86_XOP;
7004 for (uint32_t n = 8; n <= 12; n += 4) {
7005 for (size_t k = 1; k <= 40; k += 9) {
7006 GemmMicrokernelTester()
7007 .mr(3)
7008 .nr(4)
7009 .kr(2)
7010 .sr(1)
7011 .m(3)
7012 .n(n)
7013 .k(k)
7014 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08007015 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007016 }
7017 }
7018 }
7019
7020 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_div_4_subtile) {
7021 TEST_REQUIRES_X86_XOP;
7022 for (uint32_t n = 8; n <= 12; n += 4) {
7023 for (size_t k = 1; k <= 40; k += 9) {
7024 for (uint32_t m = 1; m <= 3; m++) {
7025 GemmMicrokernelTester()
7026 .mr(3)
7027 .nr(4)
7028 .kr(2)
7029 .sr(1)
7030 .m(m)
7031 .n(n)
7032 .k(k)
7033 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007034 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007035 }
7036 }
7037 }
7038 }
7039
7040 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, small_kernel) {
7041 TEST_REQUIRES_X86_XOP;
7042 for (size_t k = 1; k <= 40; k += 9) {
7043 GemmMicrokernelTester()
7044 .mr(3)
7045 .nr(4)
7046 .kr(2)
7047 .sr(1)
7048 .m(3)
7049 .n(4)
7050 .k(k)
7051 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08007052 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007053 }
7054 }
7055
7056 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, small_kernel_subtile) {
7057 TEST_REQUIRES_X86_XOP;
7058 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007059 for (uint32_t n = 1; n <= 4; n++) {
7060 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007061 GemmMicrokernelTester()
7062 .mr(3)
7063 .nr(4)
7064 .kr(2)
7065 .sr(1)
7066 .m(m)
7067 .n(n)
7068 .k(k)
7069 .ks(3)
7070 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007071 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007072 }
7073 }
7074 }
7075 }
7076
7077 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_gt_4_small_kernel) {
7078 TEST_REQUIRES_X86_XOP;
7079 for (uint32_t n = 5; n < 8; n++) {
7080 for (size_t k = 1; k <= 40; k += 9) {
7081 GemmMicrokernelTester()
7082 .mr(3)
7083 .nr(4)
7084 .kr(2)
7085 .sr(1)
7086 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007087 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007088 .k(k)
7089 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08007090 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007091 }
7092 }
7093 }
7094
7095 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_div_4_small_kernel) {
7096 TEST_REQUIRES_X86_XOP;
7097 for (uint32_t n = 8; n <= 12; n += 4) {
7098 for (size_t k = 1; k <= 40; k += 9) {
7099 GemmMicrokernelTester()
7100 .mr(3)
7101 .nr(4)
7102 .kr(2)
7103 .sr(1)
7104 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007105 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007106 .k(k)
7107 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08007108 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007109 }
7110 }
7111 }
7112
7113 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, strided_cm_subtile) {
7114 TEST_REQUIRES_X86_XOP;
7115 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007116 for (uint32_t n = 1; n <= 4; n++) {
7117 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007118 GemmMicrokernelTester()
7119 .mr(3)
7120 .nr(4)
7121 .kr(2)
7122 .sr(1)
7123 .m(m)
7124 .n(n)
7125 .k(k)
7126 .cm_stride(7)
7127 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007128 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007129 }
7130 }
7131 }
7132 }
7133
7134 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, a_offset) {
7135 TEST_REQUIRES_X86_XOP;
7136 for (size_t k = 1; k <= 40; k += 9) {
7137 GemmMicrokernelTester()
7138 .mr(3)
7139 .nr(4)
7140 .kr(2)
7141 .sr(1)
7142 .m(3)
7143 .n(4)
7144 .k(k)
7145 .ks(3)
7146 .a_offset(127)
Marat Dukhan50323b82022-01-11 00:12:01 -08007147 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007148 }
7149 }
7150
7151 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, zero) {
7152 TEST_REQUIRES_X86_XOP;
Zhi An Ng83844ae2022-01-14 09:52:25 -08007153 for (size_t k = 1; k <= 40; k += 9) {
7154 for (uint32_t mz = 0; mz < 3; mz++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007155 GemmMicrokernelTester()
7156 .mr(3)
7157 .nr(4)
7158 .kr(2)
7159 .sr(1)
7160 .m(3)
7161 .n(4)
7162 .k(k)
7163 .ks(3)
7164 .a_offset(127)
7165 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08007166 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007167 }
7168 }
7169 }
7170
7171 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, qmin) {
7172 TEST_REQUIRES_X86_XOP;
7173 GemmMicrokernelTester()
7174 .mr(3)
7175 .nr(4)
7176 .kr(2)
7177 .sr(1)
7178 .m(3)
7179 .n(4)
7180 .k(8)
7181 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007182 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007183 }
7184
7185 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, qmax) {
7186 TEST_REQUIRES_X86_XOP;
7187 GemmMicrokernelTester()
7188 .mr(3)
7189 .nr(4)
7190 .kr(2)
7191 .sr(1)
7192 .m(3)
7193 .n(4)
7194 .k(8)
7195 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007196 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007197 }
7198
7199 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, strided_cm) {
7200 TEST_REQUIRES_X86_XOP;
7201 GemmMicrokernelTester()
7202 .mr(3)
7203 .nr(4)
7204 .kr(2)
7205 .sr(1)
7206 .m(3)
7207 .n(4)
7208 .k(8)
7209 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08007210 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007211 }
7212
7213 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, no_a_zero_point) {
7214 TEST_REQUIRES_X86_XOP;
7215 for (size_t k = 1; k <= 40; k += 9) {
7216 GemmMicrokernelTester()
7217 .mr(3)
7218 .nr(4)
7219 .kr(2)
7220 .sr(1)
7221 .m(3)
7222 .n(4)
7223 .k(k)
7224 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08007225 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007226 }
7227 }
7228
7229 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, no_b_zero_point) {
7230 TEST_REQUIRES_X86_XOP;
7231 for (size_t k = 1; k <= 40; k += 9) {
7232 GemmMicrokernelTester()
7233 .mr(3)
7234 .nr(4)
7235 .kr(2)
7236 .sr(1)
7237 .m(3)
7238 .n(4)
7239 .k(k)
7240 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08007241 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007242 }
7243 }
7244
7245 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, no_zero_point) {
7246 TEST_REQUIRES_X86_XOP;
7247 for (size_t k = 1; k <= 40; k += 9) {
7248 GemmMicrokernelTester()
7249 .mr(3)
7250 .nr(4)
7251 .kr(2)
7252 .sr(1)
7253 .m(3)
7254 .n(4)
7255 .k(k)
7256 .a_zero_point(0)
7257 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08007258 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007259 }
7260 }
7261#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7262
7263
7264#if XNN_ARCH_X86 || XNN_ARCH_X86_64
7265 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_eq_8) {
7266 TEST_REQUIRES_X86_XOP;
7267 GemmMicrokernelTester()
7268 .mr(4)
7269 .nr(4)
7270 .kr(2)
7271 .sr(1)
7272 .m(4)
7273 .n(4)
7274 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08007275 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007276 }
7277
7278 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, strided_cn) {
7279 TEST_REQUIRES_X86_XOP;
7280 GemmMicrokernelTester()
7281 .mr(4)
7282 .nr(4)
7283 .kr(2)
7284 .sr(1)
7285 .m(4)
7286 .n(4)
7287 .k(8)
7288 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08007289 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007290 }
7291
7292 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_eq_8_subtile) {
7293 TEST_REQUIRES_X86_XOP;
Zhi An Ng83844ae2022-01-14 09:52:25 -08007294 for (uint32_t n = 1; n <= 4; n++) {
7295 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007296 GemmMicrokernelTester()
7297 .mr(4)
7298 .nr(4)
7299 .kr(2)
7300 .sr(1)
7301 .m(m)
7302 .n(n)
7303 .k(8)
7304 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007305 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007306 }
7307 }
7308 }
7309
7310 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_eq_8_subtile_m) {
7311 TEST_REQUIRES_X86_XOP;
7312 for (uint32_t m = 1; m <= 4; m++) {
7313 GemmMicrokernelTester()
7314 .mr(4)
7315 .nr(4)
7316 .kr(2)
7317 .sr(1)
7318 .m(m)
7319 .n(4)
7320 .k(8)
7321 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007322 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007323 }
7324 }
7325
7326 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_eq_8_subtile_n) {
7327 TEST_REQUIRES_X86_XOP;
7328 for (uint32_t n = 1; n <= 4; n++) {
7329 GemmMicrokernelTester()
7330 .mr(4)
7331 .nr(4)
7332 .kr(2)
7333 .sr(1)
7334 .m(4)
7335 .n(n)
7336 .k(8)
7337 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007338 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007339 }
7340 }
7341
7342 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_lt_8) {
7343 TEST_REQUIRES_X86_XOP;
7344 for (size_t k = 1; k < 8; k++) {
7345 GemmMicrokernelTester()
7346 .mr(4)
7347 .nr(4)
7348 .kr(2)
7349 .sr(1)
7350 .m(4)
7351 .n(4)
7352 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007353 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007354 }
7355 }
7356
7357 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_lt_8_subtile) {
7358 TEST_REQUIRES_X86_XOP;
7359 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007360 for (uint32_t n = 1; n <= 4; n++) {
7361 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007362 GemmMicrokernelTester()
7363 .mr(4)
7364 .nr(4)
7365 .kr(2)
7366 .sr(1)
7367 .m(m)
7368 .n(n)
7369 .k(k)
7370 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007371 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007372 }
7373 }
7374 }
7375 }
7376
7377 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_gt_8) {
7378 TEST_REQUIRES_X86_XOP;
7379 for (size_t k = 9; k < 16; k++) {
7380 GemmMicrokernelTester()
7381 .mr(4)
7382 .nr(4)
7383 .kr(2)
7384 .sr(1)
7385 .m(4)
7386 .n(4)
7387 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007388 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007389 }
7390 }
7391
7392 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_gt_8_subtile) {
7393 TEST_REQUIRES_X86_XOP;
7394 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007395 for (uint32_t n = 1; n <= 4; n++) {
7396 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007397 GemmMicrokernelTester()
7398 .mr(4)
7399 .nr(4)
7400 .kr(2)
7401 .sr(1)
7402 .m(m)
7403 .n(n)
7404 .k(k)
7405 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007406 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007407 }
7408 }
7409 }
7410 }
7411
7412 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_div_8) {
7413 TEST_REQUIRES_X86_XOP;
7414 for (size_t k = 16; k <= 80; k += 8) {
7415 GemmMicrokernelTester()
7416 .mr(4)
7417 .nr(4)
7418 .kr(2)
7419 .sr(1)
7420 .m(4)
7421 .n(4)
7422 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007423 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007424 }
7425 }
7426
7427 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_div_8_subtile) {
7428 TEST_REQUIRES_X86_XOP;
7429 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007430 for (uint32_t n = 1; n <= 4; n++) {
7431 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007432 GemmMicrokernelTester()
7433 .mr(4)
7434 .nr(4)
7435 .kr(2)
7436 .sr(1)
7437 .m(m)
7438 .n(n)
7439 .k(k)
7440 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007441 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007442 }
7443 }
7444 }
7445 }
7446
7447 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_gt_4) {
7448 TEST_REQUIRES_X86_XOP;
7449 for (uint32_t n = 5; n < 8; n++) {
7450 for (size_t k = 1; k <= 40; k += 9) {
7451 GemmMicrokernelTester()
7452 .mr(4)
7453 .nr(4)
7454 .kr(2)
7455 .sr(1)
7456 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007457 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007458 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007459 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007460 }
7461 }
7462 }
7463
7464 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_gt_4_strided_cn) {
7465 TEST_REQUIRES_X86_XOP;
7466 for (uint32_t n = 5; n < 8; n++) {
7467 for (size_t k = 1; k <= 40; k += 9) {
7468 GemmMicrokernelTester()
7469 .mr(4)
7470 .nr(4)
7471 .kr(2)
7472 .sr(1)
7473 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007474 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007475 .k(k)
7476 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08007477 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007478 }
7479 }
7480 }
7481
7482 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_gt_4_subtile) {
7483 TEST_REQUIRES_X86_XOP;
7484 for (uint32_t n = 5; n < 8; n++) {
7485 for (size_t k = 1; k <= 40; k += 9) {
7486 for (uint32_t m = 1; m <= 4; m++) {
7487 GemmMicrokernelTester()
7488 .mr(4)
7489 .nr(4)
7490 .kr(2)
7491 .sr(1)
7492 .m(m)
7493 .n(n)
7494 .k(k)
7495 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007496 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007497 }
7498 }
7499 }
7500 }
7501
7502 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_div_4) {
7503 TEST_REQUIRES_X86_XOP;
7504 for (uint32_t n = 8; n <= 12; n += 4) {
7505 for (size_t k = 1; k <= 40; k += 9) {
7506 GemmMicrokernelTester()
7507 .mr(4)
7508 .nr(4)
7509 .kr(2)
7510 .sr(1)
7511 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007512 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007513 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007514 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007515 }
7516 }
7517 }
7518
7519 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_div_4_strided_cn) {
7520 TEST_REQUIRES_X86_XOP;
7521 for (uint32_t n = 8; n <= 12; n += 4) {
7522 for (size_t k = 1; k <= 40; k += 9) {
7523 GemmMicrokernelTester()
7524 .mr(4)
7525 .nr(4)
7526 .kr(2)
7527 .sr(1)
7528 .m(4)
7529 .n(n)
7530 .k(k)
7531 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08007532 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007533 }
7534 }
7535 }
7536
7537 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_div_4_subtile) {
7538 TEST_REQUIRES_X86_XOP;
7539 for (uint32_t n = 8; n <= 12; n += 4) {
7540 for (size_t k = 1; k <= 40; k += 9) {
7541 for (uint32_t m = 1; m <= 4; m++) {
7542 GemmMicrokernelTester()
7543 .mr(4)
7544 .nr(4)
7545 .kr(2)
7546 .sr(1)
7547 .m(m)
7548 .n(n)
7549 .k(k)
7550 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007551 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007552 }
7553 }
7554 }
7555 }
7556
7557 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, small_kernel) {
7558 TEST_REQUIRES_X86_XOP;
7559 for (size_t k = 1; k <= 40; k += 9) {
7560 GemmMicrokernelTester()
7561 .mr(4)
7562 .nr(4)
7563 .kr(2)
7564 .sr(1)
7565 .m(4)
7566 .n(4)
7567 .k(k)
7568 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08007569 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007570 }
7571 }
7572
7573 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, small_kernel_subtile) {
7574 TEST_REQUIRES_X86_XOP;
7575 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007576 for (uint32_t n = 1; n <= 4; n++) {
7577 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007578 GemmMicrokernelTester()
7579 .mr(4)
7580 .nr(4)
7581 .kr(2)
7582 .sr(1)
7583 .m(m)
7584 .n(n)
7585 .k(k)
7586 .ks(3)
7587 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007588 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007589 }
7590 }
7591 }
7592 }
7593
7594 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_gt_4_small_kernel) {
7595 TEST_REQUIRES_X86_XOP;
7596 for (uint32_t n = 5; n < 8; n++) {
7597 for (size_t k = 1; k <= 40; k += 9) {
7598 GemmMicrokernelTester()
7599 .mr(4)
7600 .nr(4)
7601 .kr(2)
7602 .sr(1)
7603 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007604 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007605 .k(k)
7606 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08007607 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007608 }
7609 }
7610 }
7611
7612 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_div_4_small_kernel) {
7613 TEST_REQUIRES_X86_XOP;
7614 for (uint32_t n = 8; n <= 12; n += 4) {
7615 for (size_t k = 1; k <= 40; k += 9) {
7616 GemmMicrokernelTester()
7617 .mr(4)
7618 .nr(4)
7619 .kr(2)
7620 .sr(1)
7621 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007622 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007623 .k(k)
7624 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08007625 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007626 }
7627 }
7628 }
7629
7630 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, strided_cm_subtile) {
7631 TEST_REQUIRES_X86_XOP;
7632 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007633 for (uint32_t n = 1; n <= 4; n++) {
7634 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007635 GemmMicrokernelTester()
7636 .mr(4)
7637 .nr(4)
7638 .kr(2)
7639 .sr(1)
7640 .m(m)
7641 .n(n)
7642 .k(k)
7643 .cm_stride(7)
7644 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007645 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007646 }
7647 }
7648 }
7649 }
7650
7651 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, a_offset) {
7652 TEST_REQUIRES_X86_XOP;
7653 for (size_t k = 1; k <= 40; k += 9) {
7654 GemmMicrokernelTester()
7655 .mr(4)
7656 .nr(4)
7657 .kr(2)
7658 .sr(1)
7659 .m(4)
7660 .n(4)
7661 .k(k)
7662 .ks(3)
7663 .a_offset(163)
Marat Dukhan50323b82022-01-11 00:12:01 -08007664 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007665 }
7666 }
7667
7668 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, zero) {
7669 TEST_REQUIRES_X86_XOP;
Zhi An Ng83844ae2022-01-14 09:52:25 -08007670 for (size_t k = 1; k <= 40; k += 9) {
7671 for (uint32_t mz = 0; mz < 4; mz++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007672 GemmMicrokernelTester()
7673 .mr(4)
7674 .nr(4)
7675 .kr(2)
7676 .sr(1)
7677 .m(4)
7678 .n(4)
7679 .k(k)
7680 .ks(3)
7681 .a_offset(163)
7682 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08007683 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007684 }
7685 }
7686 }
7687
7688 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, qmin) {
7689 TEST_REQUIRES_X86_XOP;
7690 GemmMicrokernelTester()
7691 .mr(4)
7692 .nr(4)
7693 .kr(2)
7694 .sr(1)
7695 .m(4)
7696 .n(4)
7697 .k(8)
7698 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007699 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007700 }
7701
7702 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, qmax) {
7703 TEST_REQUIRES_X86_XOP;
7704 GemmMicrokernelTester()
7705 .mr(4)
7706 .nr(4)
7707 .kr(2)
7708 .sr(1)
7709 .m(4)
7710 .n(4)
7711 .k(8)
7712 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007713 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007714 }
7715
7716 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, strided_cm) {
7717 TEST_REQUIRES_X86_XOP;
7718 GemmMicrokernelTester()
7719 .mr(4)
7720 .nr(4)
7721 .kr(2)
7722 .sr(1)
7723 .m(4)
7724 .n(4)
7725 .k(8)
7726 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08007727 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007728 }
7729
7730 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, no_a_zero_point) {
7731 TEST_REQUIRES_X86_XOP;
7732 for (size_t k = 1; k <= 40; k += 9) {
7733 GemmMicrokernelTester()
7734 .mr(4)
7735 .nr(4)
7736 .kr(2)
7737 .sr(1)
7738 .m(4)
7739 .n(4)
7740 .k(k)
7741 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08007742 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007743 }
7744 }
7745
7746 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, no_b_zero_point) {
7747 TEST_REQUIRES_X86_XOP;
7748 for (size_t k = 1; k <= 40; k += 9) {
7749 GemmMicrokernelTester()
7750 .mr(4)
7751 .nr(4)
7752 .kr(2)
7753 .sr(1)
7754 .m(4)
7755 .n(4)
7756 .k(k)
7757 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08007758 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007759 }
7760 }
7761
7762 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, no_zero_point) {
7763 TEST_REQUIRES_X86_XOP;
7764 for (size_t k = 1; k <= 40; k += 9) {
7765 GemmMicrokernelTester()
7766 .mr(4)
7767 .nr(4)
7768 .kr(2)
7769 .sr(1)
7770 .m(4)
7771 .n(4)
7772 .k(k)
7773 .a_zero_point(0)
7774 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08007775 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007776 }
7777 }
7778#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7779
7780
7781#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007782 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_eq_8) {
7783 TEST_REQUIRES_X86_SSE2;
7784 GemmMicrokernelTester()
7785 .mr(3)
7786 .nr(4)
7787 .kr(2)
7788 .sr(1)
7789 .m(3)
7790 .n(4)
7791 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08007792 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007793 }
7794
7795 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, strided_cn) {
7796 TEST_REQUIRES_X86_SSE2;
7797 GemmMicrokernelTester()
7798 .mr(3)
7799 .nr(4)
7800 .kr(2)
7801 .sr(1)
7802 .m(3)
7803 .n(4)
7804 .k(8)
7805 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08007806 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007807 }
7808
7809 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_eq_8_subtile) {
7810 TEST_REQUIRES_X86_SSE2;
Zhi An Ng83844ae2022-01-14 09:52:25 -08007811 for (uint32_t n = 1; n <= 4; n++) {
7812 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007813 GemmMicrokernelTester()
7814 .mr(3)
7815 .nr(4)
7816 .kr(2)
7817 .sr(1)
7818 .m(m)
7819 .n(n)
7820 .k(8)
7821 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007822 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007823 }
7824 }
7825 }
7826
7827 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_eq_8_subtile_m) {
7828 TEST_REQUIRES_X86_SSE2;
7829 for (uint32_t m = 1; m <= 3; m++) {
7830 GemmMicrokernelTester()
7831 .mr(3)
7832 .nr(4)
7833 .kr(2)
7834 .sr(1)
7835 .m(m)
7836 .n(4)
7837 .k(8)
7838 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007839 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007840 }
7841 }
7842
7843 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_eq_8_subtile_n) {
7844 TEST_REQUIRES_X86_SSE2;
7845 for (uint32_t n = 1; n <= 4; n++) {
7846 GemmMicrokernelTester()
7847 .mr(3)
7848 .nr(4)
7849 .kr(2)
7850 .sr(1)
7851 .m(3)
7852 .n(n)
7853 .k(8)
7854 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007855 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007856 }
7857 }
7858
7859 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_lt_8) {
7860 TEST_REQUIRES_X86_SSE2;
7861 for (size_t k = 1; k < 8; k++) {
7862 GemmMicrokernelTester()
7863 .mr(3)
7864 .nr(4)
7865 .kr(2)
7866 .sr(1)
7867 .m(3)
7868 .n(4)
7869 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007870 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007871 }
7872 }
7873
7874 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_lt_8_subtile) {
7875 TEST_REQUIRES_X86_SSE2;
7876 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007877 for (uint32_t n = 1; n <= 4; n++) {
7878 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007879 GemmMicrokernelTester()
7880 .mr(3)
7881 .nr(4)
7882 .kr(2)
7883 .sr(1)
7884 .m(m)
7885 .n(n)
7886 .k(k)
7887 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007888 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007889 }
7890 }
7891 }
7892 }
7893
7894 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_gt_8) {
7895 TEST_REQUIRES_X86_SSE2;
7896 for (size_t k = 9; k < 16; k++) {
7897 GemmMicrokernelTester()
7898 .mr(3)
7899 .nr(4)
7900 .kr(2)
7901 .sr(1)
7902 .m(3)
7903 .n(4)
7904 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007905 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007906 }
7907 }
7908
7909 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_gt_8_subtile) {
7910 TEST_REQUIRES_X86_SSE2;
7911 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007912 for (uint32_t n = 1; n <= 4; n++) {
7913 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007914 GemmMicrokernelTester()
7915 .mr(3)
7916 .nr(4)
7917 .kr(2)
7918 .sr(1)
7919 .m(m)
7920 .n(n)
7921 .k(k)
7922 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007923 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007924 }
7925 }
7926 }
7927 }
7928
7929 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_div_8) {
7930 TEST_REQUIRES_X86_SSE2;
7931 for (size_t k = 16; k <= 80; k += 8) {
7932 GemmMicrokernelTester()
7933 .mr(3)
7934 .nr(4)
7935 .kr(2)
7936 .sr(1)
7937 .m(3)
7938 .n(4)
7939 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007940 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007941 }
7942 }
7943
7944 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_div_8_subtile) {
7945 TEST_REQUIRES_X86_SSE2;
7946 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007947 for (uint32_t n = 1; n <= 4; n++) {
7948 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007949 GemmMicrokernelTester()
7950 .mr(3)
7951 .nr(4)
7952 .kr(2)
7953 .sr(1)
7954 .m(m)
7955 .n(n)
7956 .k(k)
7957 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007958 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007959 }
7960 }
7961 }
7962 }
7963
7964 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, n_gt_4) {
7965 TEST_REQUIRES_X86_SSE2;
7966 for (uint32_t n = 5; n < 8; n++) {
7967 for (size_t k = 1; k <= 40; k += 9) {
7968 GemmMicrokernelTester()
7969 .mr(3)
7970 .nr(4)
7971 .kr(2)
7972 .sr(1)
7973 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007974 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007975 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007976 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007977 }
7978 }
7979 }
7980
7981 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, n_gt_4_strided_cn) {
7982 TEST_REQUIRES_X86_SSE2;
7983 for (uint32_t n = 5; n < 8; n++) {
7984 for (size_t k = 1; k <= 40; k += 9) {
7985 GemmMicrokernelTester()
7986 .mr(3)
7987 .nr(4)
7988 .kr(2)
7989 .sr(1)
7990 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007991 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007992 .k(k)
7993 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08007994 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07007995 }
7996 }
7997 }
7998
7999 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, n_gt_4_subtile) {
8000 TEST_REQUIRES_X86_SSE2;
8001 for (uint32_t n = 5; n < 8; n++) {
8002 for (size_t k = 1; k <= 40; k += 9) {
8003 for (uint32_t m = 1; m <= 3; m++) {
8004 GemmMicrokernelTester()
8005 .mr(3)
8006 .nr(4)
8007 .kr(2)
8008 .sr(1)
8009 .m(m)
8010 .n(n)
8011 .k(k)
8012 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008013 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008014 }
8015 }
8016 }
8017 }
8018
8019 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, n_div_4) {
8020 TEST_REQUIRES_X86_SSE2;
8021 for (uint32_t n = 8; n <= 12; n += 4) {
8022 for (size_t k = 1; k <= 40; k += 9) {
8023 GemmMicrokernelTester()
8024 .mr(3)
8025 .nr(4)
8026 .kr(2)
8027 .sr(1)
8028 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008029 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008030 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008031 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008032 }
8033 }
8034 }
8035
8036 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, n_div_4_strided_cn) {
8037 TEST_REQUIRES_X86_SSE2;
8038 for (uint32_t n = 8; n <= 12; n += 4) {
8039 for (size_t k = 1; k <= 40; k += 9) {
8040 GemmMicrokernelTester()
8041 .mr(3)
8042 .nr(4)
8043 .kr(2)
8044 .sr(1)
8045 .m(3)
8046 .n(n)
8047 .k(k)
8048 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08008049 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008050 }
8051 }
8052 }
8053
8054 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, n_div_4_subtile) {
8055 TEST_REQUIRES_X86_SSE2;
8056 for (uint32_t n = 8; n <= 12; n += 4) {
8057 for (size_t k = 1; k <= 40; k += 9) {
8058 for (uint32_t m = 1; m <= 3; m++) {
8059 GemmMicrokernelTester()
8060 .mr(3)
8061 .nr(4)
8062 .kr(2)
8063 .sr(1)
8064 .m(m)
8065 .n(n)
8066 .k(k)
8067 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008068 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008069 }
8070 }
8071 }
8072 }
8073
8074 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, small_kernel) {
8075 TEST_REQUIRES_X86_SSE2;
8076 for (size_t k = 1; k <= 40; k += 9) {
8077 GemmMicrokernelTester()
8078 .mr(3)
8079 .nr(4)
8080 .kr(2)
8081 .sr(1)
8082 .m(3)
8083 .n(4)
8084 .k(k)
8085 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08008086 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008087 }
8088 }
8089
8090 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, small_kernel_subtile) {
8091 TEST_REQUIRES_X86_SSE2;
8092 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008093 for (uint32_t n = 1; n <= 4; n++) {
8094 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008095 GemmMicrokernelTester()
8096 .mr(3)
8097 .nr(4)
8098 .kr(2)
8099 .sr(1)
8100 .m(m)
8101 .n(n)
8102 .k(k)
8103 .ks(3)
8104 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008105 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008106 }
8107 }
8108 }
8109 }
8110
8111 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, n_gt_4_small_kernel) {
8112 TEST_REQUIRES_X86_SSE2;
8113 for (uint32_t n = 5; n < 8; n++) {
8114 for (size_t k = 1; k <= 40; k += 9) {
8115 GemmMicrokernelTester()
8116 .mr(3)
8117 .nr(4)
8118 .kr(2)
8119 .sr(1)
8120 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008121 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008122 .k(k)
8123 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08008124 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008125 }
8126 }
8127 }
8128
8129 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, n_div_4_small_kernel) {
8130 TEST_REQUIRES_X86_SSE2;
8131 for (uint32_t n = 8; n <= 12; n += 4) {
8132 for (size_t k = 1; k <= 40; k += 9) {
8133 GemmMicrokernelTester()
8134 .mr(3)
8135 .nr(4)
8136 .kr(2)
8137 .sr(1)
8138 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008139 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008140 .k(k)
8141 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08008142 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008143 }
8144 }
8145 }
8146
8147 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, strided_cm_subtile) {
8148 TEST_REQUIRES_X86_SSE2;
8149 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008150 for (uint32_t n = 1; n <= 4; n++) {
8151 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008152 GemmMicrokernelTester()
8153 .mr(3)
8154 .nr(4)
8155 .kr(2)
8156 .sr(1)
8157 .m(m)
8158 .n(n)
8159 .k(k)
8160 .cm_stride(7)
8161 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008162 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008163 }
8164 }
8165 }
8166 }
8167
8168 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, a_offset) {
8169 TEST_REQUIRES_X86_SSE2;
8170 for (size_t k = 1; k <= 40; k += 9) {
8171 GemmMicrokernelTester()
8172 .mr(3)
8173 .nr(4)
8174 .kr(2)
8175 .sr(1)
8176 .m(3)
8177 .n(4)
8178 .k(k)
8179 .ks(3)
8180 .a_offset(127)
Marat Dukhan50323b82022-01-11 00:12:01 -08008181 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008182 }
8183 }
8184
8185 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, zero) {
8186 TEST_REQUIRES_X86_SSE2;
Zhi An Ng83844ae2022-01-14 09:52:25 -08008187 for (size_t k = 1; k <= 40; k += 9) {
8188 for (uint32_t mz = 0; mz < 3; mz++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008189 GemmMicrokernelTester()
8190 .mr(3)
8191 .nr(4)
8192 .kr(2)
8193 .sr(1)
8194 .m(3)
8195 .n(4)
8196 .k(k)
8197 .ks(3)
8198 .a_offset(127)
8199 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08008200 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008201 }
8202 }
8203 }
8204
8205 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, qmin) {
8206 TEST_REQUIRES_X86_SSE2;
8207 GemmMicrokernelTester()
8208 .mr(3)
8209 .nr(4)
8210 .kr(2)
8211 .sr(1)
8212 .m(3)
8213 .n(4)
8214 .k(8)
8215 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008216 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008217 }
8218
8219 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, qmax) {
8220 TEST_REQUIRES_X86_SSE2;
8221 GemmMicrokernelTester()
8222 .mr(3)
8223 .nr(4)
8224 .kr(2)
8225 .sr(1)
8226 .m(3)
8227 .n(4)
8228 .k(8)
8229 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008230 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008231 }
8232
8233 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, strided_cm) {
8234 TEST_REQUIRES_X86_SSE2;
8235 GemmMicrokernelTester()
8236 .mr(3)
8237 .nr(4)
8238 .kr(2)
8239 .sr(1)
8240 .m(3)
8241 .n(4)
8242 .k(8)
8243 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08008244 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008245 }
8246
8247 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, no_a_zero_point) {
8248 TEST_REQUIRES_X86_SSE2;
8249 for (size_t k = 1; k <= 40; k += 9) {
8250 GemmMicrokernelTester()
8251 .mr(3)
8252 .nr(4)
8253 .kr(2)
8254 .sr(1)
8255 .m(3)
8256 .n(4)
8257 .k(k)
8258 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08008259 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008260 }
8261 }
8262
8263 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, no_b_zero_point) {
8264 TEST_REQUIRES_X86_SSE2;
8265 for (size_t k = 1; k <= 40; k += 9) {
8266 GemmMicrokernelTester()
8267 .mr(3)
8268 .nr(4)
8269 .kr(2)
8270 .sr(1)
8271 .m(3)
8272 .n(4)
8273 .k(k)
8274 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08008275 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008276 }
8277 }
8278
8279 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, no_zero_point) {
8280 TEST_REQUIRES_X86_SSE2;
8281 for (size_t k = 1; k <= 40; k += 9) {
8282 GemmMicrokernelTester()
8283 .mr(3)
8284 .nr(4)
8285 .kr(2)
8286 .sr(1)
8287 .m(3)
8288 .n(4)
8289 .k(k)
8290 .a_zero_point(0)
8291 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08008292 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008293 }
8294 }
8295#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8296
8297
8298#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008299 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_eq_8) {
8300 TEST_REQUIRES_X86_SSE41;
8301 GemmMicrokernelTester()
8302 .mr(3)
8303 .nr(4)
8304 .kr(2)
8305 .sr(1)
8306 .m(3)
8307 .n(4)
8308 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08008309 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008310 }
8311
8312 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, strided_cn) {
8313 TEST_REQUIRES_X86_SSE41;
8314 GemmMicrokernelTester()
8315 .mr(3)
8316 .nr(4)
8317 .kr(2)
8318 .sr(1)
8319 .m(3)
8320 .n(4)
8321 .k(8)
8322 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08008323 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008324 }
8325
8326 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_eq_8_subtile) {
8327 TEST_REQUIRES_X86_SSE41;
Zhi An Ng83844ae2022-01-14 09:52:25 -08008328 for (uint32_t n = 1; n <= 4; n++) {
8329 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008330 GemmMicrokernelTester()
8331 .mr(3)
8332 .nr(4)
8333 .kr(2)
8334 .sr(1)
8335 .m(m)
8336 .n(n)
8337 .k(8)
8338 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008339 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008340 }
8341 }
8342 }
8343
8344 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_eq_8_subtile_m) {
8345 TEST_REQUIRES_X86_SSE41;
8346 for (uint32_t m = 1; m <= 3; m++) {
8347 GemmMicrokernelTester()
8348 .mr(3)
8349 .nr(4)
8350 .kr(2)
8351 .sr(1)
8352 .m(m)
8353 .n(4)
8354 .k(8)
8355 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008356 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008357 }
8358 }
8359
8360 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_eq_8_subtile_n) {
8361 TEST_REQUIRES_X86_SSE41;
8362 for (uint32_t n = 1; n <= 4; n++) {
8363 GemmMicrokernelTester()
8364 .mr(3)
8365 .nr(4)
8366 .kr(2)
8367 .sr(1)
8368 .m(3)
8369 .n(n)
8370 .k(8)
8371 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008372 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008373 }
8374 }
8375
8376 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_lt_8) {
8377 TEST_REQUIRES_X86_SSE41;
8378 for (size_t k = 1; k < 8; k++) {
8379 GemmMicrokernelTester()
8380 .mr(3)
8381 .nr(4)
8382 .kr(2)
8383 .sr(1)
8384 .m(3)
8385 .n(4)
8386 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008387 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008388 }
8389 }
8390
8391 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_lt_8_subtile) {
8392 TEST_REQUIRES_X86_SSE41;
8393 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008394 for (uint32_t n = 1; n <= 4; n++) {
8395 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008396 GemmMicrokernelTester()
8397 .mr(3)
8398 .nr(4)
8399 .kr(2)
8400 .sr(1)
8401 .m(m)
8402 .n(n)
8403 .k(k)
8404 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008405 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008406 }
8407 }
8408 }
8409 }
8410
8411 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_gt_8) {
8412 TEST_REQUIRES_X86_SSE41;
8413 for (size_t k = 9; k < 16; k++) {
8414 GemmMicrokernelTester()
8415 .mr(3)
8416 .nr(4)
8417 .kr(2)
8418 .sr(1)
8419 .m(3)
8420 .n(4)
8421 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008422 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008423 }
8424 }
8425
8426 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_gt_8_subtile) {
8427 TEST_REQUIRES_X86_SSE41;
8428 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008429 for (uint32_t n = 1; n <= 4; n++) {
8430 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008431 GemmMicrokernelTester()
8432 .mr(3)
8433 .nr(4)
8434 .kr(2)
8435 .sr(1)
8436 .m(m)
8437 .n(n)
8438 .k(k)
8439 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008440 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008441 }
8442 }
8443 }
8444 }
8445
8446 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_div_8) {
8447 TEST_REQUIRES_X86_SSE41;
8448 for (size_t k = 16; k <= 80; k += 8) {
8449 GemmMicrokernelTester()
8450 .mr(3)
8451 .nr(4)
8452 .kr(2)
8453 .sr(1)
8454 .m(3)
8455 .n(4)
8456 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008457 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008458 }
8459 }
8460
8461 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_div_8_subtile) {
8462 TEST_REQUIRES_X86_SSE41;
8463 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008464 for (uint32_t n = 1; n <= 4; n++) {
8465 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008466 GemmMicrokernelTester()
8467 .mr(3)
8468 .nr(4)
8469 .kr(2)
8470 .sr(1)
8471 .m(m)
8472 .n(n)
8473 .k(k)
8474 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008475 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008476 }
8477 }
8478 }
8479 }
8480
8481 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_gt_4) {
8482 TEST_REQUIRES_X86_SSE41;
8483 for (uint32_t n = 5; n < 8; n++) {
8484 for (size_t k = 1; k <= 40; k += 9) {
8485 GemmMicrokernelTester()
8486 .mr(3)
8487 .nr(4)
8488 .kr(2)
8489 .sr(1)
8490 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008491 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008492 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008493 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008494 }
8495 }
8496 }
8497
8498 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_gt_4_strided_cn) {
8499 TEST_REQUIRES_X86_SSE41;
8500 for (uint32_t n = 5; n < 8; n++) {
8501 for (size_t k = 1; k <= 40; k += 9) {
8502 GemmMicrokernelTester()
8503 .mr(3)
8504 .nr(4)
8505 .kr(2)
8506 .sr(1)
8507 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008508 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008509 .k(k)
8510 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08008511 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008512 }
8513 }
8514 }
8515
8516 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_gt_4_subtile) {
8517 TEST_REQUIRES_X86_SSE41;
8518 for (uint32_t n = 5; n < 8; n++) {
8519 for (size_t k = 1; k <= 40; k += 9) {
8520 for (uint32_t m = 1; m <= 3; m++) {
8521 GemmMicrokernelTester()
8522 .mr(3)
8523 .nr(4)
8524 .kr(2)
8525 .sr(1)
8526 .m(m)
8527 .n(n)
8528 .k(k)
8529 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008530 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008531 }
8532 }
8533 }
8534 }
8535
8536 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_div_4) {
8537 TEST_REQUIRES_X86_SSE41;
8538 for (uint32_t n = 8; n <= 12; n += 4) {
8539 for (size_t k = 1; k <= 40; k += 9) {
8540 GemmMicrokernelTester()
8541 .mr(3)
8542 .nr(4)
8543 .kr(2)
8544 .sr(1)
8545 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008546 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008547 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008548 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008549 }
8550 }
8551 }
8552
8553 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_div_4_strided_cn) {
8554 TEST_REQUIRES_X86_SSE41;
8555 for (uint32_t n = 8; n <= 12; n += 4) {
8556 for (size_t k = 1; k <= 40; k += 9) {
8557 GemmMicrokernelTester()
8558 .mr(3)
8559 .nr(4)
8560 .kr(2)
8561 .sr(1)
8562 .m(3)
8563 .n(n)
8564 .k(k)
8565 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08008566 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008567 }
8568 }
8569 }
8570
8571 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_div_4_subtile) {
8572 TEST_REQUIRES_X86_SSE41;
8573 for (uint32_t n = 8; n <= 12; n += 4) {
8574 for (size_t k = 1; k <= 40; k += 9) {
8575 for (uint32_t m = 1; m <= 3; m++) {
8576 GemmMicrokernelTester()
8577 .mr(3)
8578 .nr(4)
8579 .kr(2)
8580 .sr(1)
8581 .m(m)
8582 .n(n)
8583 .k(k)
8584 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008585 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008586 }
8587 }
8588 }
8589 }
8590
8591 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, small_kernel) {
8592 TEST_REQUIRES_X86_SSE41;
8593 for (size_t k = 1; k <= 40; k += 9) {
8594 GemmMicrokernelTester()
8595 .mr(3)
8596 .nr(4)
8597 .kr(2)
8598 .sr(1)
8599 .m(3)
8600 .n(4)
8601 .k(k)
8602 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08008603 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008604 }
8605 }
8606
8607 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, small_kernel_subtile) {
8608 TEST_REQUIRES_X86_SSE41;
8609 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008610 for (uint32_t n = 1; n <= 4; n++) {
8611 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008612 GemmMicrokernelTester()
8613 .mr(3)
8614 .nr(4)
8615 .kr(2)
8616 .sr(1)
8617 .m(m)
8618 .n(n)
8619 .k(k)
8620 .ks(3)
8621 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008622 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008623 }
8624 }
8625 }
8626 }
8627
8628 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_gt_4_small_kernel) {
8629 TEST_REQUIRES_X86_SSE41;
8630 for (uint32_t n = 5; n < 8; n++) {
8631 for (size_t k = 1; k <= 40; k += 9) {
8632 GemmMicrokernelTester()
8633 .mr(3)
8634 .nr(4)
8635 .kr(2)
8636 .sr(1)
8637 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008638 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008639 .k(k)
8640 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08008641 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008642 }
8643 }
8644 }
8645
8646 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_div_4_small_kernel) {
8647 TEST_REQUIRES_X86_SSE41;
8648 for (uint32_t n = 8; n <= 12; n += 4) {
8649 for (size_t k = 1; k <= 40; k += 9) {
8650 GemmMicrokernelTester()
8651 .mr(3)
8652 .nr(4)
8653 .kr(2)
8654 .sr(1)
8655 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008656 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008657 .k(k)
8658 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08008659 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008660 }
8661 }
8662 }
8663
8664 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, strided_cm_subtile) {
8665 TEST_REQUIRES_X86_SSE41;
8666 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008667 for (uint32_t n = 1; n <= 4; n++) {
8668 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008669 GemmMicrokernelTester()
8670 .mr(3)
8671 .nr(4)
8672 .kr(2)
8673 .sr(1)
8674 .m(m)
8675 .n(n)
8676 .k(k)
8677 .cm_stride(7)
8678 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008679 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008680 }
8681 }
8682 }
8683 }
8684
8685 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, a_offset) {
8686 TEST_REQUIRES_X86_SSE41;
8687 for (size_t k = 1; k <= 40; k += 9) {
8688 GemmMicrokernelTester()
8689 .mr(3)
8690 .nr(4)
8691 .kr(2)
8692 .sr(1)
8693 .m(3)
8694 .n(4)
8695 .k(k)
8696 .ks(3)
8697 .a_offset(127)
Marat Dukhan50323b82022-01-11 00:12:01 -08008698 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008699 }
8700 }
8701
8702 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, zero) {
8703 TEST_REQUIRES_X86_SSE41;
Zhi An Ng83844ae2022-01-14 09:52:25 -08008704 for (size_t k = 1; k <= 40; k += 9) {
8705 for (uint32_t mz = 0; mz < 3; mz++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008706 GemmMicrokernelTester()
8707 .mr(3)
8708 .nr(4)
8709 .kr(2)
8710 .sr(1)
8711 .m(3)
8712 .n(4)
8713 .k(k)
8714 .ks(3)
8715 .a_offset(127)
8716 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08008717 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008718 }
8719 }
8720 }
8721
8722 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, qmin) {
8723 TEST_REQUIRES_X86_SSE41;
8724 GemmMicrokernelTester()
8725 .mr(3)
8726 .nr(4)
8727 .kr(2)
8728 .sr(1)
8729 .m(3)
8730 .n(4)
8731 .k(8)
8732 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008733 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008734 }
8735
8736 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, qmax) {
8737 TEST_REQUIRES_X86_SSE41;
8738 GemmMicrokernelTester()
8739 .mr(3)
8740 .nr(4)
8741 .kr(2)
8742 .sr(1)
8743 .m(3)
8744 .n(4)
8745 .k(8)
8746 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008747 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008748 }
8749
8750 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, strided_cm) {
8751 TEST_REQUIRES_X86_SSE41;
8752 GemmMicrokernelTester()
8753 .mr(3)
8754 .nr(4)
8755 .kr(2)
8756 .sr(1)
8757 .m(3)
8758 .n(4)
8759 .k(8)
8760 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08008761 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008762 }
8763
8764 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, no_a_zero_point) {
8765 TEST_REQUIRES_X86_SSE41;
8766 for (size_t k = 1; k <= 40; k += 9) {
8767 GemmMicrokernelTester()
8768 .mr(3)
8769 .nr(4)
8770 .kr(2)
8771 .sr(1)
8772 .m(3)
8773 .n(4)
8774 .k(k)
8775 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08008776 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008777 }
8778 }
8779
8780 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, no_b_zero_point) {
8781 TEST_REQUIRES_X86_SSE41;
8782 for (size_t k = 1; k <= 40; k += 9) {
8783 GemmMicrokernelTester()
8784 .mr(3)
8785 .nr(4)
8786 .kr(2)
8787 .sr(1)
8788 .m(3)
8789 .n(4)
8790 .k(k)
8791 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08008792 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008793 }
8794 }
8795
8796 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, no_zero_point) {
8797 TEST_REQUIRES_X86_SSE41;
8798 for (size_t k = 1; k <= 40; k += 9) {
8799 GemmMicrokernelTester()
8800 .mr(3)
8801 .nr(4)
8802 .kr(2)
8803 .sr(1)
8804 .m(3)
8805 .n(4)
8806 .k(k)
8807 .a_zero_point(0)
8808 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08008809 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008810 }
8811 }
8812#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8813
8814
8815#if XNN_ARCH_X86 || XNN_ARCH_X86_64
8816 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_eq_8) {
8817 TEST_REQUIRES_X86_SSE41;
8818 GemmMicrokernelTester()
8819 .mr(4)
8820 .nr(4)
8821 .kr(2)
8822 .sr(1)
8823 .m(4)
8824 .n(4)
8825 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08008826 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008827 }
8828
8829 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, strided_cn) {
8830 TEST_REQUIRES_X86_SSE41;
8831 GemmMicrokernelTester()
8832 .mr(4)
8833 .nr(4)
8834 .kr(2)
8835 .sr(1)
8836 .m(4)
8837 .n(4)
8838 .k(8)
8839 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08008840 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008841 }
8842
8843 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_eq_8_subtile) {
8844 TEST_REQUIRES_X86_SSE41;
Zhi An Ng83844ae2022-01-14 09:52:25 -08008845 for (uint32_t n = 1; n <= 4; n++) {
8846 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008847 GemmMicrokernelTester()
8848 .mr(4)
8849 .nr(4)
8850 .kr(2)
8851 .sr(1)
8852 .m(m)
8853 .n(n)
8854 .k(8)
8855 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008856 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008857 }
8858 }
8859 }
8860
8861 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_eq_8_subtile_m) {
8862 TEST_REQUIRES_X86_SSE41;
8863 for (uint32_t m = 1; m <= 4; m++) {
8864 GemmMicrokernelTester()
8865 .mr(4)
8866 .nr(4)
8867 .kr(2)
8868 .sr(1)
8869 .m(m)
8870 .n(4)
8871 .k(8)
8872 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008873 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008874 }
8875 }
8876
8877 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_eq_8_subtile_n) {
8878 TEST_REQUIRES_X86_SSE41;
8879 for (uint32_t n = 1; n <= 4; n++) {
8880 GemmMicrokernelTester()
8881 .mr(4)
8882 .nr(4)
8883 .kr(2)
8884 .sr(1)
8885 .m(4)
8886 .n(n)
8887 .k(8)
8888 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008889 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008890 }
8891 }
8892
8893 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_lt_8) {
8894 TEST_REQUIRES_X86_SSE41;
8895 for (size_t k = 1; k < 8; k++) {
8896 GemmMicrokernelTester()
8897 .mr(4)
8898 .nr(4)
8899 .kr(2)
8900 .sr(1)
8901 .m(4)
8902 .n(4)
8903 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008904 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008905 }
8906 }
8907
8908 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_lt_8_subtile) {
8909 TEST_REQUIRES_X86_SSE41;
8910 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008911 for (uint32_t n = 1; n <= 4; n++) {
8912 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008913 GemmMicrokernelTester()
8914 .mr(4)
8915 .nr(4)
8916 .kr(2)
8917 .sr(1)
8918 .m(m)
8919 .n(n)
8920 .k(k)
8921 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008922 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008923 }
8924 }
8925 }
8926 }
8927
8928 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_gt_8) {
8929 TEST_REQUIRES_X86_SSE41;
8930 for (size_t k = 9; k < 16; k++) {
8931 GemmMicrokernelTester()
8932 .mr(4)
8933 .nr(4)
8934 .kr(2)
8935 .sr(1)
8936 .m(4)
8937 .n(4)
8938 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008939 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008940 }
8941 }
8942
8943 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_gt_8_subtile) {
8944 TEST_REQUIRES_X86_SSE41;
8945 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008946 for (uint32_t n = 1; n <= 4; n++) {
8947 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008948 GemmMicrokernelTester()
8949 .mr(4)
8950 .nr(4)
8951 .kr(2)
8952 .sr(1)
8953 .m(m)
8954 .n(n)
8955 .k(k)
8956 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008957 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008958 }
8959 }
8960 }
8961 }
8962
8963 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_div_8) {
8964 TEST_REQUIRES_X86_SSE41;
8965 for (size_t k = 16; k <= 80; k += 8) {
8966 GemmMicrokernelTester()
8967 .mr(4)
8968 .nr(4)
8969 .kr(2)
8970 .sr(1)
8971 .m(4)
8972 .n(4)
8973 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008974 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008975 }
8976 }
8977
8978 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_div_8_subtile) {
8979 TEST_REQUIRES_X86_SSE41;
8980 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008981 for (uint32_t n = 1; n <= 4; n++) {
8982 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008983 GemmMicrokernelTester()
8984 .mr(4)
8985 .nr(4)
8986 .kr(2)
8987 .sr(1)
8988 .m(m)
8989 .n(n)
8990 .k(k)
8991 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008992 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07008993 }
8994 }
8995 }
8996 }
8997
8998 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_gt_4) {
8999 TEST_REQUIRES_X86_SSE41;
9000 for (uint32_t n = 5; n < 8; n++) {
9001 for (size_t k = 1; k <= 40; k += 9) {
9002 GemmMicrokernelTester()
9003 .mr(4)
9004 .nr(4)
9005 .kr(2)
9006 .sr(1)
9007 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009008 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009009 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009010 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009011 }
9012 }
9013 }
9014
9015 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_gt_4_strided_cn) {
9016 TEST_REQUIRES_X86_SSE41;
9017 for (uint32_t n = 5; n < 8; n++) {
9018 for (size_t k = 1; k <= 40; k += 9) {
9019 GemmMicrokernelTester()
9020 .mr(4)
9021 .nr(4)
9022 .kr(2)
9023 .sr(1)
9024 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009025 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009026 .k(k)
9027 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08009028 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009029 }
9030 }
9031 }
9032
9033 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_gt_4_subtile) {
9034 TEST_REQUIRES_X86_SSE41;
9035 for (uint32_t n = 5; n < 8; n++) {
9036 for (size_t k = 1; k <= 40; k += 9) {
9037 for (uint32_t m = 1; m <= 4; m++) {
9038 GemmMicrokernelTester()
9039 .mr(4)
9040 .nr(4)
9041 .kr(2)
9042 .sr(1)
9043 .m(m)
9044 .n(n)
9045 .k(k)
9046 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009047 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009048 }
9049 }
9050 }
9051 }
9052
9053 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_div_4) {
9054 TEST_REQUIRES_X86_SSE41;
9055 for (uint32_t n = 8; n <= 12; n += 4) {
9056 for (size_t k = 1; k <= 40; k += 9) {
9057 GemmMicrokernelTester()
9058 .mr(4)
9059 .nr(4)
9060 .kr(2)
9061 .sr(1)
9062 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009063 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009064 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009065 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009066 }
9067 }
9068 }
9069
9070 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_div_4_strided_cn) {
9071 TEST_REQUIRES_X86_SSE41;
9072 for (uint32_t n = 8; n <= 12; n += 4) {
9073 for (size_t k = 1; k <= 40; k += 9) {
9074 GemmMicrokernelTester()
9075 .mr(4)
9076 .nr(4)
9077 .kr(2)
9078 .sr(1)
9079 .m(4)
9080 .n(n)
9081 .k(k)
9082 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08009083 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009084 }
9085 }
9086 }
9087
9088 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_div_4_subtile) {
9089 TEST_REQUIRES_X86_SSE41;
9090 for (uint32_t n = 8; n <= 12; n += 4) {
9091 for (size_t k = 1; k <= 40; k += 9) {
9092 for (uint32_t m = 1; m <= 4; m++) {
9093 GemmMicrokernelTester()
9094 .mr(4)
9095 .nr(4)
9096 .kr(2)
9097 .sr(1)
9098 .m(m)
9099 .n(n)
9100 .k(k)
9101 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009102 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009103 }
9104 }
9105 }
9106 }
9107
9108 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, small_kernel) {
9109 TEST_REQUIRES_X86_SSE41;
9110 for (size_t k = 1; k <= 40; k += 9) {
9111 GemmMicrokernelTester()
9112 .mr(4)
9113 .nr(4)
9114 .kr(2)
9115 .sr(1)
9116 .m(4)
9117 .n(4)
9118 .k(k)
9119 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08009120 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009121 }
9122 }
9123
9124 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, small_kernel_subtile) {
9125 TEST_REQUIRES_X86_SSE41;
9126 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009127 for (uint32_t n = 1; n <= 4; n++) {
9128 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009129 GemmMicrokernelTester()
9130 .mr(4)
9131 .nr(4)
9132 .kr(2)
9133 .sr(1)
9134 .m(m)
9135 .n(n)
9136 .k(k)
9137 .ks(3)
9138 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009139 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009140 }
9141 }
9142 }
9143 }
9144
9145 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_gt_4_small_kernel) {
9146 TEST_REQUIRES_X86_SSE41;
9147 for (uint32_t n = 5; n < 8; n++) {
9148 for (size_t k = 1; k <= 40; k += 9) {
9149 GemmMicrokernelTester()
9150 .mr(4)
9151 .nr(4)
9152 .kr(2)
9153 .sr(1)
9154 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009155 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009156 .k(k)
9157 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08009158 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009159 }
9160 }
9161 }
9162
9163 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_div_4_small_kernel) {
9164 TEST_REQUIRES_X86_SSE41;
9165 for (uint32_t n = 8; n <= 12; n += 4) {
9166 for (size_t k = 1; k <= 40; k += 9) {
9167 GemmMicrokernelTester()
9168 .mr(4)
9169 .nr(4)
9170 .kr(2)
9171 .sr(1)
9172 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009173 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009174 .k(k)
9175 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08009176 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009177 }
9178 }
9179 }
9180
9181 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, strided_cm_subtile) {
9182 TEST_REQUIRES_X86_SSE41;
9183 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009184 for (uint32_t n = 1; n <= 4; n++) {
9185 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009186 GemmMicrokernelTester()
9187 .mr(4)
9188 .nr(4)
9189 .kr(2)
9190 .sr(1)
9191 .m(m)
9192 .n(n)
9193 .k(k)
9194 .cm_stride(7)
9195 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009196 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009197 }
9198 }
9199 }
9200 }
9201
9202 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, a_offset) {
9203 TEST_REQUIRES_X86_SSE41;
9204 for (size_t k = 1; k <= 40; k += 9) {
9205 GemmMicrokernelTester()
9206 .mr(4)
9207 .nr(4)
9208 .kr(2)
9209 .sr(1)
9210 .m(4)
9211 .n(4)
9212 .k(k)
9213 .ks(3)
9214 .a_offset(163)
Marat Dukhan50323b82022-01-11 00:12:01 -08009215 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009216 }
9217 }
9218
9219 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, zero) {
9220 TEST_REQUIRES_X86_SSE41;
Zhi An Ng83844ae2022-01-14 09:52:25 -08009221 for (size_t k = 1; k <= 40; k += 9) {
9222 for (uint32_t mz = 0; mz < 4; mz++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009223 GemmMicrokernelTester()
9224 .mr(4)
9225 .nr(4)
9226 .kr(2)
9227 .sr(1)
9228 .m(4)
9229 .n(4)
9230 .k(k)
9231 .ks(3)
9232 .a_offset(163)
9233 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08009234 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009235 }
9236 }
9237 }
9238
9239 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, qmin) {
9240 TEST_REQUIRES_X86_SSE41;
9241 GemmMicrokernelTester()
9242 .mr(4)
9243 .nr(4)
9244 .kr(2)
9245 .sr(1)
9246 .m(4)
9247 .n(4)
9248 .k(8)
9249 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009250 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009251 }
9252
9253 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, qmax) {
9254 TEST_REQUIRES_X86_SSE41;
9255 GemmMicrokernelTester()
9256 .mr(4)
9257 .nr(4)
9258 .kr(2)
9259 .sr(1)
9260 .m(4)
9261 .n(4)
9262 .k(8)
9263 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009264 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009265 }
9266
9267 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, strided_cm) {
9268 TEST_REQUIRES_X86_SSE41;
9269 GemmMicrokernelTester()
9270 .mr(4)
9271 .nr(4)
9272 .kr(2)
9273 .sr(1)
9274 .m(4)
9275 .n(4)
9276 .k(8)
9277 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08009278 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009279 }
9280
9281 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, no_a_zero_point) {
9282 TEST_REQUIRES_X86_SSE41;
9283 for (size_t k = 1; k <= 40; k += 9) {
9284 GemmMicrokernelTester()
9285 .mr(4)
9286 .nr(4)
9287 .kr(2)
9288 .sr(1)
9289 .m(4)
9290 .n(4)
9291 .k(k)
9292 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08009293 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009294 }
9295 }
9296
9297 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, no_b_zero_point) {
9298 TEST_REQUIRES_X86_SSE41;
9299 for (size_t k = 1; k <= 40; k += 9) {
9300 GemmMicrokernelTester()
9301 .mr(4)
9302 .nr(4)
9303 .kr(2)
9304 .sr(1)
9305 .m(4)
9306 .n(4)
9307 .k(k)
9308 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08009309 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009310 }
9311 }
9312
9313 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, no_zero_point) {
9314 TEST_REQUIRES_X86_SSE41;
9315 for (size_t k = 1; k <= 40; k += 9) {
9316 GemmMicrokernelTester()
9317 .mr(4)
9318 .nr(4)
9319 .kr(2)
9320 .sr(1)
9321 .m(4)
9322 .n(4)
9323 .k(k)
9324 .a_zero_point(0)
9325 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08009326 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009327 }
9328 }
9329#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9330
9331
9332#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009333 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_eq_8) {
9334 TEST_REQUIRES_X86_AVX;
9335 GemmMicrokernelTester()
9336 .mr(3)
9337 .nr(4)
9338 .kr(2)
9339 .sr(1)
9340 .m(3)
9341 .n(4)
9342 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08009343 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009344 }
9345
9346 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, strided_cn) {
9347 TEST_REQUIRES_X86_AVX;
9348 GemmMicrokernelTester()
9349 .mr(3)
9350 .nr(4)
9351 .kr(2)
9352 .sr(1)
9353 .m(3)
9354 .n(4)
9355 .k(8)
9356 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08009357 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009358 }
9359
9360 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_eq_8_subtile) {
9361 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -08009362 for (uint32_t n = 1; n <= 4; n++) {
9363 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009364 GemmMicrokernelTester()
9365 .mr(3)
9366 .nr(4)
9367 .kr(2)
9368 .sr(1)
9369 .m(m)
9370 .n(n)
9371 .k(8)
9372 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009373 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009374 }
9375 }
9376 }
9377
9378 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_eq_8_subtile_m) {
9379 TEST_REQUIRES_X86_AVX;
9380 for (uint32_t m = 1; m <= 3; m++) {
9381 GemmMicrokernelTester()
9382 .mr(3)
9383 .nr(4)
9384 .kr(2)
9385 .sr(1)
9386 .m(m)
9387 .n(4)
9388 .k(8)
9389 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009390 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009391 }
9392 }
9393
9394 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_eq_8_subtile_n) {
9395 TEST_REQUIRES_X86_AVX;
9396 for (uint32_t n = 1; n <= 4; n++) {
9397 GemmMicrokernelTester()
9398 .mr(3)
9399 .nr(4)
9400 .kr(2)
9401 .sr(1)
9402 .m(3)
9403 .n(n)
9404 .k(8)
9405 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009406 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009407 }
9408 }
9409
9410 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_lt_8) {
9411 TEST_REQUIRES_X86_AVX;
9412 for (size_t k = 1; k < 8; k++) {
9413 GemmMicrokernelTester()
9414 .mr(3)
9415 .nr(4)
9416 .kr(2)
9417 .sr(1)
9418 .m(3)
9419 .n(4)
9420 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009421 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009422 }
9423 }
9424
9425 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_lt_8_subtile) {
9426 TEST_REQUIRES_X86_AVX;
9427 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009428 for (uint32_t n = 1; n <= 4; n++) {
9429 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009430 GemmMicrokernelTester()
9431 .mr(3)
9432 .nr(4)
9433 .kr(2)
9434 .sr(1)
9435 .m(m)
9436 .n(n)
9437 .k(k)
9438 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009439 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009440 }
9441 }
9442 }
9443 }
9444
9445 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_gt_8) {
9446 TEST_REQUIRES_X86_AVX;
9447 for (size_t k = 9; k < 16; k++) {
9448 GemmMicrokernelTester()
9449 .mr(3)
9450 .nr(4)
9451 .kr(2)
9452 .sr(1)
9453 .m(3)
9454 .n(4)
9455 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009456 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009457 }
9458 }
9459
9460 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_gt_8_subtile) {
9461 TEST_REQUIRES_X86_AVX;
9462 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009463 for (uint32_t n = 1; n <= 4; n++) {
9464 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009465 GemmMicrokernelTester()
9466 .mr(3)
9467 .nr(4)
9468 .kr(2)
9469 .sr(1)
9470 .m(m)
9471 .n(n)
9472 .k(k)
9473 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009474 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009475 }
9476 }
9477 }
9478 }
9479
9480 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_div_8) {
9481 TEST_REQUIRES_X86_AVX;
9482 for (size_t k = 16; k <= 80; k += 8) {
9483 GemmMicrokernelTester()
9484 .mr(3)
9485 .nr(4)
9486 .kr(2)
9487 .sr(1)
9488 .m(3)
9489 .n(4)
9490 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009491 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009492 }
9493 }
9494
9495 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_div_8_subtile) {
9496 TEST_REQUIRES_X86_AVX;
9497 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009498 for (uint32_t n = 1; n <= 4; n++) {
9499 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009500 GemmMicrokernelTester()
9501 .mr(3)
9502 .nr(4)
9503 .kr(2)
9504 .sr(1)
9505 .m(m)
9506 .n(n)
9507 .k(k)
9508 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009509 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009510 }
9511 }
9512 }
9513 }
9514
9515 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_gt_4) {
9516 TEST_REQUIRES_X86_AVX;
9517 for (uint32_t n = 5; n < 8; n++) {
9518 for (size_t k = 1; k <= 40; k += 9) {
9519 GemmMicrokernelTester()
9520 .mr(3)
9521 .nr(4)
9522 .kr(2)
9523 .sr(1)
9524 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009525 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009526 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009527 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009528 }
9529 }
9530 }
9531
9532 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_gt_4_strided_cn) {
9533 TEST_REQUIRES_X86_AVX;
9534 for (uint32_t n = 5; n < 8; n++) {
9535 for (size_t k = 1; k <= 40; k += 9) {
9536 GemmMicrokernelTester()
9537 .mr(3)
9538 .nr(4)
9539 .kr(2)
9540 .sr(1)
9541 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009542 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009543 .k(k)
9544 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08009545 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009546 }
9547 }
9548 }
9549
9550 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_gt_4_subtile) {
9551 TEST_REQUIRES_X86_AVX;
9552 for (uint32_t n = 5; n < 8; n++) {
9553 for (size_t k = 1; k <= 40; k += 9) {
9554 for (uint32_t m = 1; m <= 3; m++) {
9555 GemmMicrokernelTester()
9556 .mr(3)
9557 .nr(4)
9558 .kr(2)
9559 .sr(1)
9560 .m(m)
9561 .n(n)
9562 .k(k)
9563 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009564 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009565 }
9566 }
9567 }
9568 }
9569
9570 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_div_4) {
9571 TEST_REQUIRES_X86_AVX;
9572 for (uint32_t n = 8; n <= 12; n += 4) {
9573 for (size_t k = 1; k <= 40; k += 9) {
9574 GemmMicrokernelTester()
9575 .mr(3)
9576 .nr(4)
9577 .kr(2)
9578 .sr(1)
9579 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009580 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009581 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009582 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009583 }
9584 }
9585 }
9586
9587 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_div_4_strided_cn) {
9588 TEST_REQUIRES_X86_AVX;
9589 for (uint32_t n = 8; n <= 12; n += 4) {
9590 for (size_t k = 1; k <= 40; k += 9) {
9591 GemmMicrokernelTester()
9592 .mr(3)
9593 .nr(4)
9594 .kr(2)
9595 .sr(1)
9596 .m(3)
9597 .n(n)
9598 .k(k)
9599 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08009600 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009601 }
9602 }
9603 }
9604
9605 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_div_4_subtile) {
9606 TEST_REQUIRES_X86_AVX;
9607 for (uint32_t n = 8; n <= 12; n += 4) {
9608 for (size_t k = 1; k <= 40; k += 9) {
9609 for (uint32_t m = 1; m <= 3; m++) {
9610 GemmMicrokernelTester()
9611 .mr(3)
9612 .nr(4)
9613 .kr(2)
9614 .sr(1)
9615 .m(m)
9616 .n(n)
9617 .k(k)
9618 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009619 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009620 }
9621 }
9622 }
9623 }
9624
9625 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, small_kernel) {
9626 TEST_REQUIRES_X86_AVX;
9627 for (size_t k = 1; k <= 40; k += 9) {
9628 GemmMicrokernelTester()
9629 .mr(3)
9630 .nr(4)
9631 .kr(2)
9632 .sr(1)
9633 .m(3)
9634 .n(4)
9635 .k(k)
9636 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08009637 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009638 }
9639 }
9640
9641 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, small_kernel_subtile) {
9642 TEST_REQUIRES_X86_AVX;
9643 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009644 for (uint32_t n = 1; n <= 4; n++) {
9645 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009646 GemmMicrokernelTester()
9647 .mr(3)
9648 .nr(4)
9649 .kr(2)
9650 .sr(1)
9651 .m(m)
9652 .n(n)
9653 .k(k)
9654 .ks(3)
9655 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009656 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009657 }
9658 }
9659 }
9660 }
9661
9662 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_gt_4_small_kernel) {
9663 TEST_REQUIRES_X86_AVX;
9664 for (uint32_t n = 5; n < 8; n++) {
9665 for (size_t k = 1; k <= 40; k += 9) {
9666 GemmMicrokernelTester()
9667 .mr(3)
9668 .nr(4)
9669 .kr(2)
9670 .sr(1)
9671 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009672 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009673 .k(k)
9674 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08009675 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009676 }
9677 }
9678 }
9679
9680 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_div_4_small_kernel) {
9681 TEST_REQUIRES_X86_AVX;
9682 for (uint32_t n = 8; n <= 12; n += 4) {
9683 for (size_t k = 1; k <= 40; k += 9) {
9684 GemmMicrokernelTester()
9685 .mr(3)
9686 .nr(4)
9687 .kr(2)
9688 .sr(1)
9689 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009690 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009691 .k(k)
9692 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08009693 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009694 }
9695 }
9696 }
9697
9698 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, strided_cm_subtile) {
9699 TEST_REQUIRES_X86_AVX;
9700 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009701 for (uint32_t n = 1; n <= 4; n++) {
9702 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009703 GemmMicrokernelTester()
9704 .mr(3)
9705 .nr(4)
9706 .kr(2)
9707 .sr(1)
9708 .m(m)
9709 .n(n)
9710 .k(k)
9711 .cm_stride(7)
9712 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009713 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009714 }
9715 }
9716 }
9717 }
9718
9719 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, a_offset) {
9720 TEST_REQUIRES_X86_AVX;
9721 for (size_t k = 1; k <= 40; k += 9) {
9722 GemmMicrokernelTester()
9723 .mr(3)
9724 .nr(4)
9725 .kr(2)
9726 .sr(1)
9727 .m(3)
9728 .n(4)
9729 .k(k)
9730 .ks(3)
9731 .a_offset(127)
Marat Dukhan50323b82022-01-11 00:12:01 -08009732 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009733 }
9734 }
9735
9736 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, zero) {
9737 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -08009738 for (size_t k = 1; k <= 40; k += 9) {
9739 for (uint32_t mz = 0; mz < 3; mz++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009740 GemmMicrokernelTester()
9741 .mr(3)
9742 .nr(4)
9743 .kr(2)
9744 .sr(1)
9745 .m(3)
9746 .n(4)
9747 .k(k)
9748 .ks(3)
9749 .a_offset(127)
9750 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08009751 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009752 }
9753 }
9754 }
9755
9756 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, qmin) {
9757 TEST_REQUIRES_X86_AVX;
9758 GemmMicrokernelTester()
9759 .mr(3)
9760 .nr(4)
9761 .kr(2)
9762 .sr(1)
9763 .m(3)
9764 .n(4)
9765 .k(8)
9766 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009767 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009768 }
9769
9770 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, qmax) {
9771 TEST_REQUIRES_X86_AVX;
9772 GemmMicrokernelTester()
9773 .mr(3)
9774 .nr(4)
9775 .kr(2)
9776 .sr(1)
9777 .m(3)
9778 .n(4)
9779 .k(8)
9780 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009781 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009782 }
9783
9784 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, strided_cm) {
9785 TEST_REQUIRES_X86_AVX;
9786 GemmMicrokernelTester()
9787 .mr(3)
9788 .nr(4)
9789 .kr(2)
9790 .sr(1)
9791 .m(3)
9792 .n(4)
9793 .k(8)
9794 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08009795 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009796 }
9797
9798 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, no_a_zero_point) {
9799 TEST_REQUIRES_X86_AVX;
9800 for (size_t k = 1; k <= 40; k += 9) {
9801 GemmMicrokernelTester()
9802 .mr(3)
9803 .nr(4)
9804 .kr(2)
9805 .sr(1)
9806 .m(3)
9807 .n(4)
9808 .k(k)
9809 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08009810 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009811 }
9812 }
9813
9814 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, no_b_zero_point) {
9815 TEST_REQUIRES_X86_AVX;
9816 for (size_t k = 1; k <= 40; k += 9) {
9817 GemmMicrokernelTester()
9818 .mr(3)
9819 .nr(4)
9820 .kr(2)
9821 .sr(1)
9822 .m(3)
9823 .n(4)
9824 .k(k)
9825 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08009826 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009827 }
9828 }
9829
9830 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, no_zero_point) {
9831 TEST_REQUIRES_X86_AVX;
9832 for (size_t k = 1; k <= 40; k += 9) {
9833 GemmMicrokernelTester()
9834 .mr(3)
9835 .nr(4)
9836 .kr(2)
9837 .sr(1)
9838 .m(3)
9839 .n(4)
9840 .k(k)
9841 .a_zero_point(0)
9842 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08009843 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009844 }
9845 }
9846#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9847
9848
9849#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009850 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_eq_8) {
9851 TEST_REQUIRES_X86_XOP;
9852 GemmMicrokernelTester()
9853 .mr(1)
9854 .nr(4)
9855 .kr(2)
9856 .sr(1)
9857 .m(1)
9858 .n(4)
9859 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08009860 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009861 }
9862
9863 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, strided_cn) {
9864 TEST_REQUIRES_X86_XOP;
9865 GemmMicrokernelTester()
9866 .mr(1)
9867 .nr(4)
9868 .kr(2)
9869 .sr(1)
9870 .m(1)
9871 .n(4)
9872 .k(8)
9873 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -08009874 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009875 }
9876
9877 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_eq_8_subtile) {
9878 TEST_REQUIRES_X86_XOP;
Zhi An Ng83844ae2022-01-14 09:52:25 -08009879 for (uint32_t n = 1; n <= 4; n++) {
9880 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009881 GemmMicrokernelTester()
9882 .mr(1)
9883 .nr(4)
9884 .kr(2)
9885 .sr(1)
9886 .m(m)
9887 .n(n)
9888 .k(8)
9889 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009890 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009891 }
9892 }
9893 }
9894
9895 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_eq_8_subtile_m) {
9896 TEST_REQUIRES_X86_XOP;
9897 for (uint32_t m = 1; m <= 1; m++) {
9898 GemmMicrokernelTester()
9899 .mr(1)
9900 .nr(4)
9901 .kr(2)
9902 .sr(1)
9903 .m(m)
9904 .n(4)
9905 .k(8)
9906 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009907 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009908 }
9909 }
9910
9911 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_eq_8_subtile_n) {
9912 TEST_REQUIRES_X86_XOP;
9913 for (uint32_t n = 1; n <= 4; n++) {
9914 GemmMicrokernelTester()
9915 .mr(1)
9916 .nr(4)
9917 .kr(2)
9918 .sr(1)
9919 .m(1)
9920 .n(n)
9921 .k(8)
9922 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009923 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009924 }
9925 }
9926
9927 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_lt_8) {
9928 TEST_REQUIRES_X86_XOP;
9929 for (size_t k = 1; k < 8; k++) {
9930 GemmMicrokernelTester()
9931 .mr(1)
9932 .nr(4)
9933 .kr(2)
9934 .sr(1)
9935 .m(1)
9936 .n(4)
9937 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009938 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009939 }
9940 }
9941
9942 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_lt_8_subtile) {
9943 TEST_REQUIRES_X86_XOP;
9944 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009945 for (uint32_t n = 1; n <= 4; n++) {
9946 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009947 GemmMicrokernelTester()
9948 .mr(1)
9949 .nr(4)
9950 .kr(2)
9951 .sr(1)
9952 .m(m)
9953 .n(n)
9954 .k(k)
9955 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009956 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009957 }
9958 }
9959 }
9960 }
9961
9962 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_gt_8) {
9963 TEST_REQUIRES_X86_XOP;
9964 for (size_t k = 9; k < 16; k++) {
9965 GemmMicrokernelTester()
9966 .mr(1)
9967 .nr(4)
9968 .kr(2)
9969 .sr(1)
9970 .m(1)
9971 .n(4)
9972 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009973 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009974 }
9975 }
9976
9977 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_gt_8_subtile) {
9978 TEST_REQUIRES_X86_XOP;
9979 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009980 for (uint32_t n = 1; n <= 4; n++) {
9981 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009982 GemmMicrokernelTester()
9983 .mr(1)
9984 .nr(4)
9985 .kr(2)
9986 .sr(1)
9987 .m(m)
9988 .n(n)
9989 .k(k)
9990 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009991 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -07009992 }
9993 }
9994 }
9995 }
9996
9997 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_div_8) {
9998 TEST_REQUIRES_X86_XOP;
9999 for (size_t k = 16; k <= 80; k += 8) {
10000 GemmMicrokernelTester()
10001 .mr(1)
10002 .nr(4)
10003 .kr(2)
10004 .sr(1)
10005 .m(1)
10006 .n(4)
10007 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080010008 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010009 }
10010 }
10011
10012 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_div_8_subtile) {
10013 TEST_REQUIRES_X86_XOP;
10014 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010015 for (uint32_t n = 1; n <= 4; n++) {
10016 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010017 GemmMicrokernelTester()
10018 .mr(1)
10019 .nr(4)
10020 .kr(2)
10021 .sr(1)
10022 .m(m)
10023 .n(n)
10024 .k(k)
10025 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010026 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010027 }
10028 }
10029 }
10030 }
10031
10032 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_gt_4) {
10033 TEST_REQUIRES_X86_XOP;
10034 for (uint32_t n = 5; n < 8; n++) {
10035 for (size_t k = 1; k <= 40; k += 9) {
10036 GemmMicrokernelTester()
10037 .mr(1)
10038 .nr(4)
10039 .kr(2)
10040 .sr(1)
10041 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010042 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010043 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080010044 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010045 }
10046 }
10047 }
10048
10049 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_gt_4_strided_cn) {
10050 TEST_REQUIRES_X86_XOP;
10051 for (uint32_t n = 5; n < 8; n++) {
10052 for (size_t k = 1; k <= 40; k += 9) {
10053 GemmMicrokernelTester()
10054 .mr(1)
10055 .nr(4)
10056 .kr(2)
10057 .sr(1)
10058 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010059 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010060 .k(k)
10061 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080010062 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010063 }
10064 }
10065 }
10066
10067 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_gt_4_subtile) {
10068 TEST_REQUIRES_X86_XOP;
10069 for (uint32_t n = 5; n < 8; n++) {
10070 for (size_t k = 1; k <= 40; k += 9) {
10071 for (uint32_t m = 1; m <= 1; m++) {
10072 GemmMicrokernelTester()
10073 .mr(1)
10074 .nr(4)
10075 .kr(2)
10076 .sr(1)
10077 .m(m)
10078 .n(n)
10079 .k(k)
10080 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010081 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010082 }
10083 }
10084 }
10085 }
10086
10087 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_div_4) {
10088 TEST_REQUIRES_X86_XOP;
10089 for (uint32_t n = 8; n <= 12; n += 4) {
10090 for (size_t k = 1; k <= 40; k += 9) {
10091 GemmMicrokernelTester()
10092 .mr(1)
10093 .nr(4)
10094 .kr(2)
10095 .sr(1)
10096 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010097 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010098 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080010099 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010100 }
10101 }
10102 }
10103
10104 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_div_4_strided_cn) {
10105 TEST_REQUIRES_X86_XOP;
10106 for (uint32_t n = 8; n <= 12; n += 4) {
10107 for (size_t k = 1; k <= 40; k += 9) {
10108 GemmMicrokernelTester()
10109 .mr(1)
10110 .nr(4)
10111 .kr(2)
10112 .sr(1)
10113 .m(1)
10114 .n(n)
10115 .k(k)
10116 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080010117 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010118 }
10119 }
10120 }
10121
10122 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_div_4_subtile) {
10123 TEST_REQUIRES_X86_XOP;
10124 for (uint32_t n = 8; n <= 12; n += 4) {
10125 for (size_t k = 1; k <= 40; k += 9) {
10126 for (uint32_t m = 1; m <= 1; m++) {
10127 GemmMicrokernelTester()
10128 .mr(1)
10129 .nr(4)
10130 .kr(2)
10131 .sr(1)
10132 .m(m)
10133 .n(n)
10134 .k(k)
10135 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010136 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010137 }
10138 }
10139 }
10140 }
10141
10142 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, small_kernel) {
10143 TEST_REQUIRES_X86_XOP;
10144 for (size_t k = 1; k <= 40; k += 9) {
10145 GemmMicrokernelTester()
10146 .mr(1)
10147 .nr(4)
10148 .kr(2)
10149 .sr(1)
10150 .m(1)
10151 .n(4)
10152 .k(k)
10153 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080010154 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010155 }
10156 }
10157
10158 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, small_kernel_subtile) {
10159 TEST_REQUIRES_X86_XOP;
10160 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010161 for (uint32_t n = 1; n <= 4; n++) {
10162 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010163 GemmMicrokernelTester()
10164 .mr(1)
10165 .nr(4)
10166 .kr(2)
10167 .sr(1)
10168 .m(m)
10169 .n(n)
10170 .k(k)
10171 .ks(3)
10172 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010173 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010174 }
10175 }
10176 }
10177 }
10178
10179 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_gt_4_small_kernel) {
10180 TEST_REQUIRES_X86_XOP;
10181 for (uint32_t n = 5; n < 8; n++) {
10182 for (size_t k = 1; k <= 40; k += 9) {
10183 GemmMicrokernelTester()
10184 .mr(1)
10185 .nr(4)
10186 .kr(2)
10187 .sr(1)
10188 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010189 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010190 .k(k)
10191 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080010192 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010193 }
10194 }
10195 }
10196
10197 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_div_4_small_kernel) {
10198 TEST_REQUIRES_X86_XOP;
10199 for (uint32_t n = 8; n <= 12; n += 4) {
10200 for (size_t k = 1; k <= 40; k += 9) {
10201 GemmMicrokernelTester()
10202 .mr(1)
10203 .nr(4)
10204 .kr(2)
10205 .sr(1)
10206 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010207 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010208 .k(k)
10209 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080010210 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010211 }
10212 }
10213 }
10214
10215 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, strided_cm_subtile) {
10216 TEST_REQUIRES_X86_XOP;
10217 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010218 for (uint32_t n = 1; n <= 4; n++) {
10219 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010220 GemmMicrokernelTester()
10221 .mr(1)
10222 .nr(4)
10223 .kr(2)
10224 .sr(1)
10225 .m(m)
10226 .n(n)
10227 .k(k)
10228 .cm_stride(7)
10229 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010230 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010231 }
10232 }
10233 }
10234 }
10235
10236 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, a_offset) {
10237 TEST_REQUIRES_X86_XOP;
10238 for (size_t k = 1; k <= 40; k += 9) {
10239 GemmMicrokernelTester()
10240 .mr(1)
10241 .nr(4)
10242 .kr(2)
10243 .sr(1)
10244 .m(1)
10245 .n(4)
10246 .k(k)
10247 .ks(3)
10248 .a_offset(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080010249 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010250 }
10251 }
10252
10253 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, zero) {
10254 TEST_REQUIRES_X86_XOP;
Zhi An Ng83844ae2022-01-14 09:52:25 -080010255 for (size_t k = 1; k <= 40; k += 9) {
10256 for (uint32_t mz = 0; mz < 1; mz++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010257 GemmMicrokernelTester()
10258 .mr(1)
10259 .nr(4)
10260 .kr(2)
10261 .sr(1)
10262 .m(1)
10263 .n(4)
10264 .k(k)
10265 .ks(3)
10266 .a_offset(43)
10267 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080010268 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010269 }
10270 }
10271 }
10272
10273 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, qmin) {
10274 TEST_REQUIRES_X86_XOP;
10275 GemmMicrokernelTester()
10276 .mr(1)
10277 .nr(4)
10278 .kr(2)
10279 .sr(1)
10280 .m(1)
10281 .n(4)
10282 .k(8)
10283 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010284 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010285 }
10286
10287 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, qmax) {
10288 TEST_REQUIRES_X86_XOP;
10289 GemmMicrokernelTester()
10290 .mr(1)
10291 .nr(4)
10292 .kr(2)
10293 .sr(1)
10294 .m(1)
10295 .n(4)
10296 .k(8)
10297 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010298 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010299 }
10300
10301 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, strided_cm) {
10302 TEST_REQUIRES_X86_XOP;
10303 GemmMicrokernelTester()
10304 .mr(1)
10305 .nr(4)
10306 .kr(2)
10307 .sr(1)
10308 .m(1)
10309 .n(4)
10310 .k(8)
10311 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080010312 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010313 }
10314
10315 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, no_a_zero_point) {
10316 TEST_REQUIRES_X86_XOP;
10317 for (size_t k = 1; k <= 40; k += 9) {
10318 GemmMicrokernelTester()
10319 .mr(1)
10320 .nr(4)
10321 .kr(2)
10322 .sr(1)
10323 .m(1)
10324 .n(4)
10325 .k(k)
10326 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080010327 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010328 }
10329 }
10330
10331 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, no_b_zero_point) {
10332 TEST_REQUIRES_X86_XOP;
10333 for (size_t k = 1; k <= 40; k += 9) {
10334 GemmMicrokernelTester()
10335 .mr(1)
10336 .nr(4)
10337 .kr(2)
10338 .sr(1)
10339 .m(1)
10340 .n(4)
10341 .k(k)
10342 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080010343 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010344 }
10345 }
10346
10347 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, no_zero_point) {
10348 TEST_REQUIRES_X86_XOP;
10349 for (size_t k = 1; k <= 40; k += 9) {
10350 GemmMicrokernelTester()
10351 .mr(1)
10352 .nr(4)
10353 .kr(2)
10354 .sr(1)
10355 .m(1)
10356 .n(4)
10357 .k(k)
10358 .a_zero_point(0)
10359 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080010360 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010361 }
10362 }
10363#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
10364
10365
10366#if XNN_ARCH_X86 || XNN_ARCH_X86_64
10367 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_eq_8) {
10368 TEST_REQUIRES_X86_XOP;
10369 GemmMicrokernelTester()
10370 .mr(2)
10371 .nr(4)
10372 .kr(2)
10373 .sr(1)
10374 .m(2)
10375 .n(4)
10376 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080010377 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010378 }
10379
10380 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, strided_cn) {
10381 TEST_REQUIRES_X86_XOP;
10382 GemmMicrokernelTester()
10383 .mr(2)
10384 .nr(4)
10385 .kr(2)
10386 .sr(1)
10387 .m(2)
10388 .n(4)
10389 .k(8)
10390 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080010391 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010392 }
10393
10394 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_eq_8_subtile) {
10395 TEST_REQUIRES_X86_XOP;
Zhi An Ng83844ae2022-01-14 09:52:25 -080010396 for (uint32_t n = 1; n <= 4; n++) {
10397 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010398 GemmMicrokernelTester()
10399 .mr(2)
10400 .nr(4)
10401 .kr(2)
10402 .sr(1)
10403 .m(m)
10404 .n(n)
10405 .k(8)
10406 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010407 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010408 }
10409 }
10410 }
10411
10412 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_eq_8_subtile_m) {
10413 TEST_REQUIRES_X86_XOP;
10414 for (uint32_t m = 1; m <= 2; m++) {
10415 GemmMicrokernelTester()
10416 .mr(2)
10417 .nr(4)
10418 .kr(2)
10419 .sr(1)
10420 .m(m)
10421 .n(4)
10422 .k(8)
10423 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010424 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010425 }
10426 }
10427
10428 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_eq_8_subtile_n) {
10429 TEST_REQUIRES_X86_XOP;
10430 for (uint32_t n = 1; n <= 4; n++) {
10431 GemmMicrokernelTester()
10432 .mr(2)
10433 .nr(4)
10434 .kr(2)
10435 .sr(1)
10436 .m(2)
10437 .n(n)
10438 .k(8)
10439 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010440 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010441 }
10442 }
10443
10444 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_lt_8) {
10445 TEST_REQUIRES_X86_XOP;
10446 for (size_t k = 1; k < 8; k++) {
10447 GemmMicrokernelTester()
10448 .mr(2)
10449 .nr(4)
10450 .kr(2)
10451 .sr(1)
10452 .m(2)
10453 .n(4)
10454 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080010455 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010456 }
10457 }
10458
10459 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_lt_8_subtile) {
10460 TEST_REQUIRES_X86_XOP;
10461 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010462 for (uint32_t n = 1; n <= 4; n++) {
10463 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010464 GemmMicrokernelTester()
10465 .mr(2)
10466 .nr(4)
10467 .kr(2)
10468 .sr(1)
10469 .m(m)
10470 .n(n)
10471 .k(k)
10472 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010473 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010474 }
10475 }
10476 }
10477 }
10478
10479 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_gt_8) {
10480 TEST_REQUIRES_X86_XOP;
10481 for (size_t k = 9; k < 16; k++) {
10482 GemmMicrokernelTester()
10483 .mr(2)
10484 .nr(4)
10485 .kr(2)
10486 .sr(1)
10487 .m(2)
10488 .n(4)
10489 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080010490 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010491 }
10492 }
10493
10494 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_gt_8_subtile) {
10495 TEST_REQUIRES_X86_XOP;
10496 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010497 for (uint32_t n = 1; n <= 4; n++) {
10498 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010499 GemmMicrokernelTester()
10500 .mr(2)
10501 .nr(4)
10502 .kr(2)
10503 .sr(1)
10504 .m(m)
10505 .n(n)
10506 .k(k)
10507 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010508 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010509 }
10510 }
10511 }
10512 }
10513
10514 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_div_8) {
10515 TEST_REQUIRES_X86_XOP;
10516 for (size_t k = 16; k <= 80; k += 8) {
10517 GemmMicrokernelTester()
10518 .mr(2)
10519 .nr(4)
10520 .kr(2)
10521 .sr(1)
10522 .m(2)
10523 .n(4)
10524 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080010525 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010526 }
10527 }
10528
10529 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_div_8_subtile) {
10530 TEST_REQUIRES_X86_XOP;
10531 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010532 for (uint32_t n = 1; n <= 4; n++) {
10533 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010534 GemmMicrokernelTester()
10535 .mr(2)
10536 .nr(4)
10537 .kr(2)
10538 .sr(1)
10539 .m(m)
10540 .n(n)
10541 .k(k)
10542 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010543 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010544 }
10545 }
10546 }
10547 }
10548
10549 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_gt_4) {
10550 TEST_REQUIRES_X86_XOP;
10551 for (uint32_t n = 5; n < 8; n++) {
10552 for (size_t k = 1; k <= 40; k += 9) {
10553 GemmMicrokernelTester()
10554 .mr(2)
10555 .nr(4)
10556 .kr(2)
10557 .sr(1)
10558 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010559 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010560 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080010561 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010562 }
10563 }
10564 }
10565
10566 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_gt_4_strided_cn) {
10567 TEST_REQUIRES_X86_XOP;
10568 for (uint32_t n = 5; n < 8; n++) {
10569 for (size_t k = 1; k <= 40; k += 9) {
10570 GemmMicrokernelTester()
10571 .mr(2)
10572 .nr(4)
10573 .kr(2)
10574 .sr(1)
10575 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010576 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010577 .k(k)
10578 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080010579 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010580 }
10581 }
10582 }
10583
10584 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_gt_4_subtile) {
10585 TEST_REQUIRES_X86_XOP;
10586 for (uint32_t n = 5; n < 8; n++) {
10587 for (size_t k = 1; k <= 40; k += 9) {
10588 for (uint32_t m = 1; m <= 2; m++) {
10589 GemmMicrokernelTester()
10590 .mr(2)
10591 .nr(4)
10592 .kr(2)
10593 .sr(1)
10594 .m(m)
10595 .n(n)
10596 .k(k)
10597 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010598 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010599 }
10600 }
10601 }
10602 }
10603
10604 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_div_4) {
10605 TEST_REQUIRES_X86_XOP;
10606 for (uint32_t n = 8; n <= 12; n += 4) {
10607 for (size_t k = 1; k <= 40; k += 9) {
10608 GemmMicrokernelTester()
10609 .mr(2)
10610 .nr(4)
10611 .kr(2)
10612 .sr(1)
10613 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010614 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010615 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080010616 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010617 }
10618 }
10619 }
10620
10621 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_div_4_strided_cn) {
10622 TEST_REQUIRES_X86_XOP;
10623 for (uint32_t n = 8; n <= 12; n += 4) {
10624 for (size_t k = 1; k <= 40; k += 9) {
10625 GemmMicrokernelTester()
10626 .mr(2)
10627 .nr(4)
10628 .kr(2)
10629 .sr(1)
10630 .m(2)
10631 .n(n)
10632 .k(k)
10633 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080010634 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010635 }
10636 }
10637 }
10638
10639 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_div_4_subtile) {
10640 TEST_REQUIRES_X86_XOP;
10641 for (uint32_t n = 8; n <= 12; n += 4) {
10642 for (size_t k = 1; k <= 40; k += 9) {
10643 for (uint32_t m = 1; m <= 2; m++) {
10644 GemmMicrokernelTester()
10645 .mr(2)
10646 .nr(4)
10647 .kr(2)
10648 .sr(1)
10649 .m(m)
10650 .n(n)
10651 .k(k)
10652 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010653 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010654 }
10655 }
10656 }
10657 }
10658
10659 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, small_kernel) {
10660 TEST_REQUIRES_X86_XOP;
10661 for (size_t k = 1; k <= 40; k += 9) {
10662 GemmMicrokernelTester()
10663 .mr(2)
10664 .nr(4)
10665 .kr(2)
10666 .sr(1)
10667 .m(2)
10668 .n(4)
10669 .k(k)
10670 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080010671 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010672 }
10673 }
10674
10675 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, small_kernel_subtile) {
10676 TEST_REQUIRES_X86_XOP;
10677 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010678 for (uint32_t n = 1; n <= 4; n++) {
10679 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010680 GemmMicrokernelTester()
10681 .mr(2)
10682 .nr(4)
10683 .kr(2)
10684 .sr(1)
10685 .m(m)
10686 .n(n)
10687 .k(k)
10688 .ks(3)
10689 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010690 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010691 }
10692 }
10693 }
10694 }
10695
10696 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_gt_4_small_kernel) {
10697 TEST_REQUIRES_X86_XOP;
10698 for (uint32_t n = 5; n < 8; n++) {
10699 for (size_t k = 1; k <= 40; k += 9) {
10700 GemmMicrokernelTester()
10701 .mr(2)
10702 .nr(4)
10703 .kr(2)
10704 .sr(1)
10705 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010706 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010707 .k(k)
10708 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080010709 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010710 }
10711 }
10712 }
10713
10714 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_div_4_small_kernel) {
10715 TEST_REQUIRES_X86_XOP;
10716 for (uint32_t n = 8; n <= 12; n += 4) {
10717 for (size_t k = 1; k <= 40; k += 9) {
10718 GemmMicrokernelTester()
10719 .mr(2)
10720 .nr(4)
10721 .kr(2)
10722 .sr(1)
10723 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010724 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010725 .k(k)
10726 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080010727 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010728 }
10729 }
10730 }
10731
10732 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, strided_cm_subtile) {
10733 TEST_REQUIRES_X86_XOP;
10734 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010735 for (uint32_t n = 1; n <= 4; n++) {
10736 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010737 GemmMicrokernelTester()
10738 .mr(2)
10739 .nr(4)
10740 .kr(2)
10741 .sr(1)
10742 .m(m)
10743 .n(n)
10744 .k(k)
10745 .cm_stride(7)
10746 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010747 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010748 }
10749 }
10750 }
10751 }
10752
10753 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, a_offset) {
10754 TEST_REQUIRES_X86_XOP;
10755 for (size_t k = 1; k <= 40; k += 9) {
10756 GemmMicrokernelTester()
10757 .mr(2)
10758 .nr(4)
10759 .kr(2)
10760 .sr(1)
10761 .m(2)
10762 .n(4)
10763 .k(k)
10764 .ks(3)
10765 .a_offset(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080010766 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010767 }
10768 }
10769
10770 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, zero) {
10771 TEST_REQUIRES_X86_XOP;
Zhi An Ng83844ae2022-01-14 09:52:25 -080010772 for (size_t k = 1; k <= 40; k += 9) {
10773 for (uint32_t mz = 0; mz < 2; mz++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010774 GemmMicrokernelTester()
10775 .mr(2)
10776 .nr(4)
10777 .kr(2)
10778 .sr(1)
10779 .m(2)
10780 .n(4)
10781 .k(k)
10782 .ks(3)
10783 .a_offset(83)
10784 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080010785 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010786 }
10787 }
10788 }
10789
10790 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, qmin) {
10791 TEST_REQUIRES_X86_XOP;
10792 GemmMicrokernelTester()
10793 .mr(2)
10794 .nr(4)
10795 .kr(2)
10796 .sr(1)
10797 .m(2)
10798 .n(4)
10799 .k(8)
10800 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010801 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010802 }
10803
10804 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, qmax) {
10805 TEST_REQUIRES_X86_XOP;
10806 GemmMicrokernelTester()
10807 .mr(2)
10808 .nr(4)
10809 .kr(2)
10810 .sr(1)
10811 .m(2)
10812 .n(4)
10813 .k(8)
10814 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010815 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010816 }
10817
10818 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, strided_cm) {
10819 TEST_REQUIRES_X86_XOP;
10820 GemmMicrokernelTester()
10821 .mr(2)
10822 .nr(4)
10823 .kr(2)
10824 .sr(1)
10825 .m(2)
10826 .n(4)
10827 .k(8)
10828 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080010829 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010830 }
10831
10832 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, no_a_zero_point) {
10833 TEST_REQUIRES_X86_XOP;
10834 for (size_t k = 1; k <= 40; k += 9) {
10835 GemmMicrokernelTester()
10836 .mr(2)
10837 .nr(4)
10838 .kr(2)
10839 .sr(1)
10840 .m(2)
10841 .n(4)
10842 .k(k)
10843 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080010844 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010845 }
10846 }
10847
10848 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, no_b_zero_point) {
10849 TEST_REQUIRES_X86_XOP;
10850 for (size_t k = 1; k <= 40; k += 9) {
10851 GemmMicrokernelTester()
10852 .mr(2)
10853 .nr(4)
10854 .kr(2)
10855 .sr(1)
10856 .m(2)
10857 .n(4)
10858 .k(k)
10859 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080010860 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010861 }
10862 }
10863
10864 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, no_zero_point) {
10865 TEST_REQUIRES_X86_XOP;
10866 for (size_t k = 1; k <= 40; k += 9) {
10867 GemmMicrokernelTester()
10868 .mr(2)
10869 .nr(4)
10870 .kr(2)
10871 .sr(1)
10872 .m(2)
10873 .n(4)
10874 .k(k)
10875 .a_zero_point(0)
10876 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080010877 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010878 }
10879 }
10880#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
10881
10882
10883#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010884 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_eq_8) {
10885 TEST_REQUIRES_X86_XOP;
10886 GemmMicrokernelTester()
10887 .mr(4)
10888 .nr(4)
10889 .kr(2)
10890 .sr(1)
10891 .m(4)
10892 .n(4)
10893 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080010894 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010895 }
10896
10897 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, strided_cn) {
10898 TEST_REQUIRES_X86_XOP;
10899 GemmMicrokernelTester()
10900 .mr(4)
10901 .nr(4)
10902 .kr(2)
10903 .sr(1)
10904 .m(4)
10905 .n(4)
10906 .k(8)
10907 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080010908 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010909 }
10910
10911 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_eq_8_subtile) {
10912 TEST_REQUIRES_X86_XOP;
Zhi An Ng83844ae2022-01-14 09:52:25 -080010913 for (uint32_t n = 1; n <= 4; n++) {
10914 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010915 GemmMicrokernelTester()
10916 .mr(4)
10917 .nr(4)
10918 .kr(2)
10919 .sr(1)
10920 .m(m)
10921 .n(n)
10922 .k(8)
10923 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010924 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010925 }
10926 }
10927 }
10928
10929 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_eq_8_subtile_m) {
10930 TEST_REQUIRES_X86_XOP;
10931 for (uint32_t m = 1; m <= 4; m++) {
10932 GemmMicrokernelTester()
10933 .mr(4)
10934 .nr(4)
10935 .kr(2)
10936 .sr(1)
10937 .m(m)
10938 .n(4)
10939 .k(8)
10940 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010941 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010942 }
10943 }
10944
10945 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_eq_8_subtile_n) {
10946 TEST_REQUIRES_X86_XOP;
10947 for (uint32_t n = 1; n <= 4; n++) {
10948 GemmMicrokernelTester()
10949 .mr(4)
10950 .nr(4)
10951 .kr(2)
10952 .sr(1)
10953 .m(4)
10954 .n(n)
10955 .k(8)
10956 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010957 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010958 }
10959 }
10960
10961 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_lt_8) {
10962 TEST_REQUIRES_X86_XOP;
10963 for (size_t k = 1; k < 8; k++) {
10964 GemmMicrokernelTester()
10965 .mr(4)
10966 .nr(4)
10967 .kr(2)
10968 .sr(1)
10969 .m(4)
10970 .n(4)
10971 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080010972 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010973 }
10974 }
10975
10976 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_lt_8_subtile) {
10977 TEST_REQUIRES_X86_XOP;
10978 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010979 for (uint32_t n = 1; n <= 4; n++) {
10980 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010981 GemmMicrokernelTester()
10982 .mr(4)
10983 .nr(4)
10984 .kr(2)
10985 .sr(1)
10986 .m(m)
10987 .n(n)
10988 .k(k)
10989 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010990 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070010991 }
10992 }
10993 }
10994 }
10995
10996 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_gt_8) {
10997 TEST_REQUIRES_X86_XOP;
10998 for (size_t k = 9; k < 16; k++) {
10999 GemmMicrokernelTester()
11000 .mr(4)
11001 .nr(4)
11002 .kr(2)
11003 .sr(1)
11004 .m(4)
11005 .n(4)
11006 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080011007 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011008 }
11009 }
11010
11011 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_gt_8_subtile) {
11012 TEST_REQUIRES_X86_XOP;
11013 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011014 for (uint32_t n = 1; n <= 4; n++) {
11015 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011016 GemmMicrokernelTester()
11017 .mr(4)
11018 .nr(4)
11019 .kr(2)
11020 .sr(1)
11021 .m(m)
11022 .n(n)
11023 .k(k)
11024 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011025 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011026 }
11027 }
11028 }
11029 }
11030
11031 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_div_8) {
11032 TEST_REQUIRES_X86_XOP;
11033 for (size_t k = 16; k <= 80; k += 8) {
11034 GemmMicrokernelTester()
11035 .mr(4)
11036 .nr(4)
11037 .kr(2)
11038 .sr(1)
11039 .m(4)
11040 .n(4)
11041 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080011042 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011043 }
11044 }
11045
11046 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_div_8_subtile) {
11047 TEST_REQUIRES_X86_XOP;
11048 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011049 for (uint32_t n = 1; n <= 4; n++) {
11050 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011051 GemmMicrokernelTester()
11052 .mr(4)
11053 .nr(4)
11054 .kr(2)
11055 .sr(1)
11056 .m(m)
11057 .n(n)
11058 .k(k)
11059 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011060 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011061 }
11062 }
11063 }
11064 }
11065
11066 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_gt_4) {
11067 TEST_REQUIRES_X86_XOP;
11068 for (uint32_t n = 5; n < 8; n++) {
11069 for (size_t k = 1; k <= 40; k += 9) {
11070 GemmMicrokernelTester()
11071 .mr(4)
11072 .nr(4)
11073 .kr(2)
11074 .sr(1)
11075 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011076 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011077 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080011078 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011079 }
11080 }
11081 }
11082
11083 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_gt_4_strided_cn) {
11084 TEST_REQUIRES_X86_XOP;
11085 for (uint32_t n = 5; n < 8; n++) {
11086 for (size_t k = 1; k <= 40; k += 9) {
11087 GemmMicrokernelTester()
11088 .mr(4)
11089 .nr(4)
11090 .kr(2)
11091 .sr(1)
11092 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011093 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011094 .k(k)
11095 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080011096 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011097 }
11098 }
11099 }
11100
11101 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_gt_4_subtile) {
11102 TEST_REQUIRES_X86_XOP;
11103 for (uint32_t n = 5; n < 8; n++) {
11104 for (size_t k = 1; k <= 40; k += 9) {
11105 for (uint32_t m = 1; m <= 4; m++) {
11106 GemmMicrokernelTester()
11107 .mr(4)
11108 .nr(4)
11109 .kr(2)
11110 .sr(1)
11111 .m(m)
11112 .n(n)
11113 .k(k)
11114 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011115 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011116 }
11117 }
11118 }
11119 }
11120
11121 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_div_4) {
11122 TEST_REQUIRES_X86_XOP;
11123 for (uint32_t n = 8; n <= 12; n += 4) {
11124 for (size_t k = 1; k <= 40; k += 9) {
11125 GemmMicrokernelTester()
11126 .mr(4)
11127 .nr(4)
11128 .kr(2)
11129 .sr(1)
11130 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011131 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011132 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080011133 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011134 }
11135 }
11136 }
11137
11138 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_div_4_strided_cn) {
11139 TEST_REQUIRES_X86_XOP;
11140 for (uint32_t n = 8; n <= 12; n += 4) {
11141 for (size_t k = 1; k <= 40; k += 9) {
11142 GemmMicrokernelTester()
11143 .mr(4)
11144 .nr(4)
11145 .kr(2)
11146 .sr(1)
11147 .m(4)
11148 .n(n)
11149 .k(k)
11150 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080011151 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011152 }
11153 }
11154 }
11155
11156 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_div_4_subtile) {
11157 TEST_REQUIRES_X86_XOP;
11158 for (uint32_t n = 8; n <= 12; n += 4) {
11159 for (size_t k = 1; k <= 40; k += 9) {
11160 for (uint32_t m = 1; m <= 4; m++) {
11161 GemmMicrokernelTester()
11162 .mr(4)
11163 .nr(4)
11164 .kr(2)
11165 .sr(1)
11166 .m(m)
11167 .n(n)
11168 .k(k)
11169 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011170 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011171 }
11172 }
11173 }
11174 }
11175
11176 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, small_kernel) {
11177 TEST_REQUIRES_X86_XOP;
11178 for (size_t k = 1; k <= 40; k += 9) {
11179 GemmMicrokernelTester()
11180 .mr(4)
11181 .nr(4)
11182 .kr(2)
11183 .sr(1)
11184 .m(4)
11185 .n(4)
11186 .k(k)
11187 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080011188 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011189 }
11190 }
11191
11192 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, small_kernel_subtile) {
11193 TEST_REQUIRES_X86_XOP;
11194 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011195 for (uint32_t n = 1; n <= 4; n++) {
11196 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011197 GemmMicrokernelTester()
11198 .mr(4)
11199 .nr(4)
11200 .kr(2)
11201 .sr(1)
11202 .m(m)
11203 .n(n)
11204 .k(k)
11205 .ks(3)
11206 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011207 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011208 }
11209 }
11210 }
11211 }
11212
11213 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_gt_4_small_kernel) {
11214 TEST_REQUIRES_X86_XOP;
11215 for (uint32_t n = 5; n < 8; n++) {
11216 for (size_t k = 1; k <= 40; k += 9) {
11217 GemmMicrokernelTester()
11218 .mr(4)
11219 .nr(4)
11220 .kr(2)
11221 .sr(1)
11222 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011223 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011224 .k(k)
11225 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080011226 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011227 }
11228 }
11229 }
11230
11231 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_div_4_small_kernel) {
11232 TEST_REQUIRES_X86_XOP;
11233 for (uint32_t n = 8; n <= 12; n += 4) {
11234 for (size_t k = 1; k <= 40; k += 9) {
11235 GemmMicrokernelTester()
11236 .mr(4)
11237 .nr(4)
11238 .kr(2)
11239 .sr(1)
11240 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011241 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011242 .k(k)
11243 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080011244 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011245 }
11246 }
11247 }
11248
11249 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, strided_cm_subtile) {
11250 TEST_REQUIRES_X86_XOP;
11251 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011252 for (uint32_t n = 1; n <= 4; n++) {
11253 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011254 GemmMicrokernelTester()
11255 .mr(4)
11256 .nr(4)
11257 .kr(2)
11258 .sr(1)
11259 .m(m)
11260 .n(n)
11261 .k(k)
11262 .cm_stride(7)
11263 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011264 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011265 }
11266 }
11267 }
11268 }
11269
11270 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, a_offset) {
11271 TEST_REQUIRES_X86_XOP;
11272 for (size_t k = 1; k <= 40; k += 9) {
11273 GemmMicrokernelTester()
11274 .mr(4)
11275 .nr(4)
11276 .kr(2)
11277 .sr(1)
11278 .m(4)
11279 .n(4)
11280 .k(k)
11281 .ks(3)
11282 .a_offset(163)
Marat Dukhan50323b82022-01-11 00:12:01 -080011283 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011284 }
11285 }
11286
11287 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, zero) {
11288 TEST_REQUIRES_X86_XOP;
Zhi An Ng83844ae2022-01-14 09:52:25 -080011289 for (size_t k = 1; k <= 40; k += 9) {
11290 for (uint32_t mz = 0; mz < 4; mz++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011291 GemmMicrokernelTester()
11292 .mr(4)
11293 .nr(4)
11294 .kr(2)
11295 .sr(1)
11296 .m(4)
11297 .n(4)
11298 .k(k)
11299 .ks(3)
11300 .a_offset(163)
11301 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080011302 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011303 }
11304 }
11305 }
11306
11307 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, qmin) {
11308 TEST_REQUIRES_X86_XOP;
11309 GemmMicrokernelTester()
11310 .mr(4)
11311 .nr(4)
11312 .kr(2)
11313 .sr(1)
11314 .m(4)
11315 .n(4)
11316 .k(8)
11317 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011318 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011319 }
11320
11321 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, qmax) {
11322 TEST_REQUIRES_X86_XOP;
11323 GemmMicrokernelTester()
11324 .mr(4)
11325 .nr(4)
11326 .kr(2)
11327 .sr(1)
11328 .m(4)
11329 .n(4)
11330 .k(8)
11331 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011332 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011333 }
11334
11335 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, strided_cm) {
11336 TEST_REQUIRES_X86_XOP;
11337 GemmMicrokernelTester()
11338 .mr(4)
11339 .nr(4)
11340 .kr(2)
11341 .sr(1)
11342 .m(4)
11343 .n(4)
11344 .k(8)
11345 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080011346 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011347 }
11348
11349 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, no_a_zero_point) {
11350 TEST_REQUIRES_X86_XOP;
11351 for (size_t k = 1; k <= 40; k += 9) {
11352 GemmMicrokernelTester()
11353 .mr(4)
11354 .nr(4)
11355 .kr(2)
11356 .sr(1)
11357 .m(4)
11358 .n(4)
11359 .k(k)
11360 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080011361 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011362 }
11363 }
11364
11365 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, no_b_zero_point) {
11366 TEST_REQUIRES_X86_XOP;
11367 for (size_t k = 1; k <= 40; k += 9) {
11368 GemmMicrokernelTester()
11369 .mr(4)
11370 .nr(4)
11371 .kr(2)
11372 .sr(1)
11373 .m(4)
11374 .n(4)
11375 .k(k)
11376 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080011377 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011378 }
11379 }
11380
11381 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, no_zero_point) {
11382 TEST_REQUIRES_X86_XOP;
11383 for (size_t k = 1; k <= 40; k += 9) {
11384 GemmMicrokernelTester()
11385 .mr(4)
11386 .nr(4)
11387 .kr(2)
11388 .sr(1)
11389 .m(4)
11390 .n(4)
11391 .k(k)
11392 .a_zero_point(0)
11393 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080011394 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011395 }
11396 }
11397#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
11398
11399
11400#if XNN_ARCH_X86 || XNN_ARCH_X86_64
11401 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_eq_8) {
11402 TEST_REQUIRES_X86_SSE2;
11403 GemmMicrokernelTester()
11404 .mr(1)
11405 .nr(4)
11406 .kr(8)
11407 .sr(1)
11408 .m(1)
11409 .n(4)
11410 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080011411 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011412 }
11413
11414 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, strided_cn) {
11415 TEST_REQUIRES_X86_SSE2;
11416 GemmMicrokernelTester()
11417 .mr(1)
11418 .nr(4)
11419 .kr(8)
11420 .sr(1)
11421 .m(1)
11422 .n(4)
11423 .k(8)
11424 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080011425 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011426 }
11427
11428 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_eq_8_subtile) {
11429 TEST_REQUIRES_X86_SSE2;
Zhi An Ng83844ae2022-01-14 09:52:25 -080011430 for (uint32_t n = 1; n <= 4; n++) {
11431 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011432 GemmMicrokernelTester()
11433 .mr(1)
11434 .nr(4)
11435 .kr(8)
11436 .sr(1)
11437 .m(m)
11438 .n(n)
11439 .k(8)
11440 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011441 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011442 }
11443 }
11444 }
11445
11446 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_eq_8_subtile_m) {
11447 TEST_REQUIRES_X86_SSE2;
11448 for (uint32_t m = 1; m <= 1; m++) {
11449 GemmMicrokernelTester()
11450 .mr(1)
11451 .nr(4)
11452 .kr(8)
11453 .sr(1)
11454 .m(m)
11455 .n(4)
11456 .k(8)
11457 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011458 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011459 }
11460 }
11461
11462 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_eq_8_subtile_n) {
11463 TEST_REQUIRES_X86_SSE2;
11464 for (uint32_t n = 1; n <= 4; n++) {
11465 GemmMicrokernelTester()
11466 .mr(1)
11467 .nr(4)
11468 .kr(8)
11469 .sr(1)
11470 .m(1)
11471 .n(n)
11472 .k(8)
11473 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011474 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011475 }
11476 }
11477
11478 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_lt_8) {
11479 TEST_REQUIRES_X86_SSE2;
11480 for (size_t k = 1; k < 8; k++) {
11481 GemmMicrokernelTester()
11482 .mr(1)
11483 .nr(4)
11484 .kr(8)
11485 .sr(1)
11486 .m(1)
11487 .n(4)
11488 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080011489 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011490 }
11491 }
11492
11493 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_lt_8_subtile) {
11494 TEST_REQUIRES_X86_SSE2;
11495 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011496 for (uint32_t n = 1; n <= 4; n++) {
11497 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011498 GemmMicrokernelTester()
11499 .mr(1)
11500 .nr(4)
11501 .kr(8)
11502 .sr(1)
11503 .m(m)
11504 .n(n)
11505 .k(k)
11506 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011507 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011508 }
11509 }
11510 }
11511 }
11512
11513 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_gt_8) {
11514 TEST_REQUIRES_X86_SSE2;
11515 for (size_t k = 9; k < 16; k++) {
11516 GemmMicrokernelTester()
11517 .mr(1)
11518 .nr(4)
11519 .kr(8)
11520 .sr(1)
11521 .m(1)
11522 .n(4)
11523 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080011524 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011525 }
11526 }
11527
11528 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_gt_8_subtile) {
11529 TEST_REQUIRES_X86_SSE2;
11530 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011531 for (uint32_t n = 1; n <= 4; n++) {
11532 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011533 GemmMicrokernelTester()
11534 .mr(1)
11535 .nr(4)
11536 .kr(8)
11537 .sr(1)
11538 .m(m)
11539 .n(n)
11540 .k(k)
11541 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011542 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011543 }
11544 }
11545 }
11546 }
11547
11548 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_div_8) {
11549 TEST_REQUIRES_X86_SSE2;
11550 for (size_t k = 16; k <= 80; k += 8) {
11551 GemmMicrokernelTester()
11552 .mr(1)
11553 .nr(4)
11554 .kr(8)
11555 .sr(1)
11556 .m(1)
11557 .n(4)
11558 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080011559 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011560 }
11561 }
11562
11563 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_div_8_subtile) {
11564 TEST_REQUIRES_X86_SSE2;
11565 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011566 for (uint32_t n = 1; n <= 4; n++) {
11567 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011568 GemmMicrokernelTester()
11569 .mr(1)
11570 .nr(4)
11571 .kr(8)
11572 .sr(1)
11573 .m(m)
11574 .n(n)
11575 .k(k)
11576 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011577 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011578 }
11579 }
11580 }
11581 }
11582
11583 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_gt_4) {
11584 TEST_REQUIRES_X86_SSE2;
11585 for (uint32_t n = 5; n < 8; n++) {
11586 for (size_t k = 1; k <= 40; k += 9) {
11587 GemmMicrokernelTester()
11588 .mr(1)
11589 .nr(4)
11590 .kr(8)
11591 .sr(1)
11592 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011593 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011594 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080011595 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011596 }
11597 }
11598 }
11599
11600 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_gt_4_strided_cn) {
11601 TEST_REQUIRES_X86_SSE2;
11602 for (uint32_t n = 5; n < 8; n++) {
11603 for (size_t k = 1; k <= 40; k += 9) {
11604 GemmMicrokernelTester()
11605 .mr(1)
11606 .nr(4)
11607 .kr(8)
11608 .sr(1)
11609 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011610 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011611 .k(k)
11612 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080011613 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011614 }
11615 }
11616 }
11617
11618 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_gt_4_subtile) {
11619 TEST_REQUIRES_X86_SSE2;
11620 for (uint32_t n = 5; n < 8; n++) {
11621 for (size_t k = 1; k <= 40; k += 9) {
11622 for (uint32_t m = 1; m <= 1; m++) {
11623 GemmMicrokernelTester()
11624 .mr(1)
11625 .nr(4)
11626 .kr(8)
11627 .sr(1)
11628 .m(m)
11629 .n(n)
11630 .k(k)
11631 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011632 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011633 }
11634 }
11635 }
11636 }
11637
11638 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_div_4) {
11639 TEST_REQUIRES_X86_SSE2;
11640 for (uint32_t n = 8; n <= 12; n += 4) {
11641 for (size_t k = 1; k <= 40; k += 9) {
11642 GemmMicrokernelTester()
11643 .mr(1)
11644 .nr(4)
11645 .kr(8)
11646 .sr(1)
11647 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011648 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011649 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080011650 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011651 }
11652 }
11653 }
11654
11655 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_div_4_strided_cn) {
11656 TEST_REQUIRES_X86_SSE2;
11657 for (uint32_t n = 8; n <= 12; n += 4) {
11658 for (size_t k = 1; k <= 40; k += 9) {
11659 GemmMicrokernelTester()
11660 .mr(1)
11661 .nr(4)
11662 .kr(8)
11663 .sr(1)
11664 .m(1)
11665 .n(n)
11666 .k(k)
11667 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080011668 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011669 }
11670 }
11671 }
11672
11673 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_div_4_subtile) {
11674 TEST_REQUIRES_X86_SSE2;
11675 for (uint32_t n = 8; n <= 12; n += 4) {
11676 for (size_t k = 1; k <= 40; k += 9) {
11677 for (uint32_t m = 1; m <= 1; m++) {
11678 GemmMicrokernelTester()
11679 .mr(1)
11680 .nr(4)
11681 .kr(8)
11682 .sr(1)
11683 .m(m)
11684 .n(n)
11685 .k(k)
11686 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011687 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011688 }
11689 }
11690 }
11691 }
11692
11693 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, small_kernel) {
11694 TEST_REQUIRES_X86_SSE2;
11695 for (size_t k = 1; k <= 40; k += 9) {
11696 GemmMicrokernelTester()
11697 .mr(1)
11698 .nr(4)
11699 .kr(8)
11700 .sr(1)
11701 .m(1)
11702 .n(4)
11703 .k(k)
11704 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080011705 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011706 }
11707 }
11708
11709 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, small_kernel_subtile) {
11710 TEST_REQUIRES_X86_SSE2;
11711 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011712 for (uint32_t n = 1; n <= 4; n++) {
11713 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011714 GemmMicrokernelTester()
11715 .mr(1)
11716 .nr(4)
11717 .kr(8)
11718 .sr(1)
11719 .m(m)
11720 .n(n)
11721 .k(k)
11722 .ks(3)
11723 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011724 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011725 }
11726 }
11727 }
11728 }
11729
11730 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_gt_4_small_kernel) {
11731 TEST_REQUIRES_X86_SSE2;
11732 for (uint32_t n = 5; n < 8; n++) {
11733 for (size_t k = 1; k <= 40; k += 9) {
11734 GemmMicrokernelTester()
11735 .mr(1)
11736 .nr(4)
11737 .kr(8)
11738 .sr(1)
11739 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011740 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011741 .k(k)
11742 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080011743 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011744 }
11745 }
11746 }
11747
11748 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_div_4_small_kernel) {
11749 TEST_REQUIRES_X86_SSE2;
11750 for (uint32_t n = 8; n <= 12; n += 4) {
11751 for (size_t k = 1; k <= 40; k += 9) {
11752 GemmMicrokernelTester()
11753 .mr(1)
11754 .nr(4)
11755 .kr(8)
11756 .sr(1)
11757 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011758 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011759 .k(k)
11760 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080011761 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011762 }
11763 }
11764 }
11765
11766 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, strided_cm_subtile) {
11767 TEST_REQUIRES_X86_SSE2;
11768 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011769 for (uint32_t n = 1; n <= 4; n++) {
11770 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011771 GemmMicrokernelTester()
11772 .mr(1)
11773 .nr(4)
11774 .kr(8)
11775 .sr(1)
11776 .m(m)
11777 .n(n)
11778 .k(k)
11779 .cm_stride(7)
11780 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011781 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011782 }
11783 }
11784 }
11785 }
11786
11787 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, a_offset) {
11788 TEST_REQUIRES_X86_SSE2;
11789 for (size_t k = 1; k <= 40; k += 9) {
11790 GemmMicrokernelTester()
11791 .mr(1)
11792 .nr(4)
11793 .kr(8)
11794 .sr(1)
11795 .m(1)
11796 .n(4)
11797 .k(k)
11798 .ks(3)
11799 .a_offset(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080011800 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011801 }
11802 }
11803
11804 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, zero) {
11805 TEST_REQUIRES_X86_SSE2;
Zhi An Ng83844ae2022-01-14 09:52:25 -080011806 for (size_t k = 1; k <= 40; k += 9) {
11807 for (uint32_t mz = 0; mz < 1; mz++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011808 GemmMicrokernelTester()
11809 .mr(1)
11810 .nr(4)
11811 .kr(8)
11812 .sr(1)
11813 .m(1)
11814 .n(4)
11815 .k(k)
11816 .ks(3)
11817 .a_offset(43)
11818 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080011819 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011820 }
11821 }
11822 }
11823
11824 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, qmin) {
11825 TEST_REQUIRES_X86_SSE2;
11826 GemmMicrokernelTester()
11827 .mr(1)
11828 .nr(4)
11829 .kr(8)
11830 .sr(1)
11831 .m(1)
11832 .n(4)
11833 .k(8)
11834 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011835 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011836 }
11837
11838 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, qmax) {
11839 TEST_REQUIRES_X86_SSE2;
11840 GemmMicrokernelTester()
11841 .mr(1)
11842 .nr(4)
11843 .kr(8)
11844 .sr(1)
11845 .m(1)
11846 .n(4)
11847 .k(8)
11848 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011849 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011850 }
11851
11852 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, strided_cm) {
11853 TEST_REQUIRES_X86_SSE2;
11854 GemmMicrokernelTester()
11855 .mr(1)
11856 .nr(4)
11857 .kr(8)
11858 .sr(1)
11859 .m(1)
11860 .n(4)
11861 .k(8)
11862 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080011863 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011864 }
11865
11866 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, no_a_zero_point) {
11867 TEST_REQUIRES_X86_SSE2;
11868 for (size_t k = 1; k <= 40; k += 9) {
11869 GemmMicrokernelTester()
11870 .mr(1)
11871 .nr(4)
11872 .kr(8)
11873 .sr(1)
11874 .m(1)
11875 .n(4)
11876 .k(k)
11877 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080011878 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011879 }
11880 }
11881
11882 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, no_b_zero_point) {
11883 TEST_REQUIRES_X86_SSE2;
11884 for (size_t k = 1; k <= 40; k += 9) {
11885 GemmMicrokernelTester()
11886 .mr(1)
11887 .nr(4)
11888 .kr(8)
11889 .sr(1)
11890 .m(1)
11891 .n(4)
11892 .k(k)
11893 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080011894 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011895 }
11896 }
11897
11898 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, no_zero_point) {
11899 TEST_REQUIRES_X86_SSE2;
11900 for (size_t k = 1; k <= 40; k += 9) {
11901 GemmMicrokernelTester()
11902 .mr(1)
11903 .nr(4)
11904 .kr(8)
11905 .sr(1)
11906 .m(1)
11907 .n(4)
11908 .k(k)
11909 .a_zero_point(0)
11910 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080011911 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011912 }
11913 }
11914#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
11915
11916
11917#if XNN_ARCH_X86 || XNN_ARCH_X86_64
11918 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_eq_8) {
11919 TEST_REQUIRES_X86_SSE2;
11920 GemmMicrokernelTester()
11921 .mr(2)
11922 .nr(4)
11923 .kr(8)
11924 .sr(1)
11925 .m(2)
11926 .n(4)
11927 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080011928 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011929 }
11930
11931 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, strided_cn) {
11932 TEST_REQUIRES_X86_SSE2;
11933 GemmMicrokernelTester()
11934 .mr(2)
11935 .nr(4)
11936 .kr(8)
11937 .sr(1)
11938 .m(2)
11939 .n(4)
11940 .k(8)
11941 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080011942 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011943 }
11944
11945 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_eq_8_subtile) {
11946 TEST_REQUIRES_X86_SSE2;
Zhi An Ng83844ae2022-01-14 09:52:25 -080011947 for (uint32_t n = 1; n <= 4; n++) {
11948 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011949 GemmMicrokernelTester()
11950 .mr(2)
11951 .nr(4)
11952 .kr(8)
11953 .sr(1)
11954 .m(m)
11955 .n(n)
11956 .k(8)
11957 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011958 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011959 }
11960 }
11961 }
11962
11963 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_eq_8_subtile_m) {
11964 TEST_REQUIRES_X86_SSE2;
11965 for (uint32_t m = 1; m <= 2; m++) {
11966 GemmMicrokernelTester()
11967 .mr(2)
11968 .nr(4)
11969 .kr(8)
11970 .sr(1)
11971 .m(m)
11972 .n(4)
11973 .k(8)
11974 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011975 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011976 }
11977 }
11978
11979 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_eq_8_subtile_n) {
11980 TEST_REQUIRES_X86_SSE2;
11981 for (uint32_t n = 1; n <= 4; n++) {
11982 GemmMicrokernelTester()
11983 .mr(2)
11984 .nr(4)
11985 .kr(8)
11986 .sr(1)
11987 .m(2)
11988 .n(n)
11989 .k(8)
11990 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011991 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070011992 }
11993 }
11994
11995 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_lt_8) {
11996 TEST_REQUIRES_X86_SSE2;
11997 for (size_t k = 1; k < 8; k++) {
11998 GemmMicrokernelTester()
11999 .mr(2)
12000 .nr(4)
12001 .kr(8)
12002 .sr(1)
12003 .m(2)
12004 .n(4)
12005 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080012006 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012007 }
12008 }
12009
12010 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_lt_8_subtile) {
12011 TEST_REQUIRES_X86_SSE2;
12012 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012013 for (uint32_t n = 1; n <= 4; n++) {
12014 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012015 GemmMicrokernelTester()
12016 .mr(2)
12017 .nr(4)
12018 .kr(8)
12019 .sr(1)
12020 .m(m)
12021 .n(n)
12022 .k(k)
12023 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012024 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012025 }
12026 }
12027 }
12028 }
12029
12030 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_gt_8) {
12031 TEST_REQUIRES_X86_SSE2;
12032 for (size_t k = 9; k < 16; k++) {
12033 GemmMicrokernelTester()
12034 .mr(2)
12035 .nr(4)
12036 .kr(8)
12037 .sr(1)
12038 .m(2)
12039 .n(4)
12040 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080012041 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012042 }
12043 }
12044
12045 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_gt_8_subtile) {
12046 TEST_REQUIRES_X86_SSE2;
12047 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012048 for (uint32_t n = 1; n <= 4; n++) {
12049 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012050 GemmMicrokernelTester()
12051 .mr(2)
12052 .nr(4)
12053 .kr(8)
12054 .sr(1)
12055 .m(m)
12056 .n(n)
12057 .k(k)
12058 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012059 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012060 }
12061 }
12062 }
12063 }
12064
12065 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_div_8) {
12066 TEST_REQUIRES_X86_SSE2;
12067 for (size_t k = 16; k <= 80; k += 8) {
12068 GemmMicrokernelTester()
12069 .mr(2)
12070 .nr(4)
12071 .kr(8)
12072 .sr(1)
12073 .m(2)
12074 .n(4)
12075 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080012076 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012077 }
12078 }
12079
12080 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_div_8_subtile) {
12081 TEST_REQUIRES_X86_SSE2;
12082 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012083 for (uint32_t n = 1; n <= 4; n++) {
12084 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012085 GemmMicrokernelTester()
12086 .mr(2)
12087 .nr(4)
12088 .kr(8)
12089 .sr(1)
12090 .m(m)
12091 .n(n)
12092 .k(k)
12093 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012094 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012095 }
12096 }
12097 }
12098 }
12099
12100 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_gt_4) {
12101 TEST_REQUIRES_X86_SSE2;
12102 for (uint32_t n = 5; n < 8; n++) {
12103 for (size_t k = 1; k <= 40; k += 9) {
12104 GemmMicrokernelTester()
12105 .mr(2)
12106 .nr(4)
12107 .kr(8)
12108 .sr(1)
12109 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012110 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012111 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080012112 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012113 }
12114 }
12115 }
12116
12117 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_gt_4_strided_cn) {
12118 TEST_REQUIRES_X86_SSE2;
12119 for (uint32_t n = 5; n < 8; n++) {
12120 for (size_t k = 1; k <= 40; k += 9) {
12121 GemmMicrokernelTester()
12122 .mr(2)
12123 .nr(4)
12124 .kr(8)
12125 .sr(1)
12126 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012127 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012128 .k(k)
12129 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080012130 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012131 }
12132 }
12133 }
12134
12135 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_gt_4_subtile) {
12136 TEST_REQUIRES_X86_SSE2;
12137 for (uint32_t n = 5; n < 8; n++) {
12138 for (size_t k = 1; k <= 40; k += 9) {
12139 for (uint32_t m = 1; m <= 2; m++) {
12140 GemmMicrokernelTester()
12141 .mr(2)
12142 .nr(4)
12143 .kr(8)
12144 .sr(1)
12145 .m(m)
12146 .n(n)
12147 .k(k)
12148 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012149 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012150 }
12151 }
12152 }
12153 }
12154
12155 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_div_4) {
12156 TEST_REQUIRES_X86_SSE2;
12157 for (uint32_t n = 8; n <= 12; n += 4) {
12158 for (size_t k = 1; k <= 40; k += 9) {
12159 GemmMicrokernelTester()
12160 .mr(2)
12161 .nr(4)
12162 .kr(8)
12163 .sr(1)
12164 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012165 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012166 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080012167 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012168 }
12169 }
12170 }
12171
12172 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_div_4_strided_cn) {
12173 TEST_REQUIRES_X86_SSE2;
12174 for (uint32_t n = 8; n <= 12; n += 4) {
12175 for (size_t k = 1; k <= 40; k += 9) {
12176 GemmMicrokernelTester()
12177 .mr(2)
12178 .nr(4)
12179 .kr(8)
12180 .sr(1)
12181 .m(2)
12182 .n(n)
12183 .k(k)
12184 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080012185 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012186 }
12187 }
12188 }
12189
12190 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_div_4_subtile) {
12191 TEST_REQUIRES_X86_SSE2;
12192 for (uint32_t n = 8; n <= 12; n += 4) {
12193 for (size_t k = 1; k <= 40; k += 9) {
12194 for (uint32_t m = 1; m <= 2; m++) {
12195 GemmMicrokernelTester()
12196 .mr(2)
12197 .nr(4)
12198 .kr(8)
12199 .sr(1)
12200 .m(m)
12201 .n(n)
12202 .k(k)
12203 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012204 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012205 }
12206 }
12207 }
12208 }
12209
12210 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, small_kernel) {
12211 TEST_REQUIRES_X86_SSE2;
12212 for (size_t k = 1; k <= 40; k += 9) {
12213 GemmMicrokernelTester()
12214 .mr(2)
12215 .nr(4)
12216 .kr(8)
12217 .sr(1)
12218 .m(2)
12219 .n(4)
12220 .k(k)
12221 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080012222 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012223 }
12224 }
12225
12226 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, small_kernel_subtile) {
12227 TEST_REQUIRES_X86_SSE2;
12228 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012229 for (uint32_t n = 1; n <= 4; n++) {
12230 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012231 GemmMicrokernelTester()
12232 .mr(2)
12233 .nr(4)
12234 .kr(8)
12235 .sr(1)
12236 .m(m)
12237 .n(n)
12238 .k(k)
12239 .ks(3)
12240 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012241 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012242 }
12243 }
12244 }
12245 }
12246
12247 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_gt_4_small_kernel) {
12248 TEST_REQUIRES_X86_SSE2;
12249 for (uint32_t n = 5; n < 8; n++) {
12250 for (size_t k = 1; k <= 40; k += 9) {
12251 GemmMicrokernelTester()
12252 .mr(2)
12253 .nr(4)
12254 .kr(8)
12255 .sr(1)
12256 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012257 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012258 .k(k)
12259 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080012260 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012261 }
12262 }
12263 }
12264
12265 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_div_4_small_kernel) {
12266 TEST_REQUIRES_X86_SSE2;
12267 for (uint32_t n = 8; n <= 12; n += 4) {
12268 for (size_t k = 1; k <= 40; k += 9) {
12269 GemmMicrokernelTester()
12270 .mr(2)
12271 .nr(4)
12272 .kr(8)
12273 .sr(1)
12274 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012275 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012276 .k(k)
12277 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080012278 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012279 }
12280 }
12281 }
12282
12283 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, strided_cm_subtile) {
12284 TEST_REQUIRES_X86_SSE2;
12285 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012286 for (uint32_t n = 1; n <= 4; n++) {
12287 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012288 GemmMicrokernelTester()
12289 .mr(2)
12290 .nr(4)
12291 .kr(8)
12292 .sr(1)
12293 .m(m)
12294 .n(n)
12295 .k(k)
12296 .cm_stride(7)
12297 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012298 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012299 }
12300 }
12301 }
12302 }
12303
12304 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, a_offset) {
12305 TEST_REQUIRES_X86_SSE2;
12306 for (size_t k = 1; k <= 40; k += 9) {
12307 GemmMicrokernelTester()
12308 .mr(2)
12309 .nr(4)
12310 .kr(8)
12311 .sr(1)
12312 .m(2)
12313 .n(4)
12314 .k(k)
12315 .ks(3)
12316 .a_offset(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080012317 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012318 }
12319 }
12320
12321 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, zero) {
12322 TEST_REQUIRES_X86_SSE2;
Zhi An Ng83844ae2022-01-14 09:52:25 -080012323 for (size_t k = 1; k <= 40; k += 9) {
12324 for (uint32_t mz = 0; mz < 2; mz++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012325 GemmMicrokernelTester()
12326 .mr(2)
12327 .nr(4)
12328 .kr(8)
12329 .sr(1)
12330 .m(2)
12331 .n(4)
12332 .k(k)
12333 .ks(3)
12334 .a_offset(83)
12335 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080012336 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012337 }
12338 }
12339 }
12340
12341 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, qmin) {
12342 TEST_REQUIRES_X86_SSE2;
12343 GemmMicrokernelTester()
12344 .mr(2)
12345 .nr(4)
12346 .kr(8)
12347 .sr(1)
12348 .m(2)
12349 .n(4)
12350 .k(8)
12351 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012352 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012353 }
12354
12355 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, qmax) {
12356 TEST_REQUIRES_X86_SSE2;
12357 GemmMicrokernelTester()
12358 .mr(2)
12359 .nr(4)
12360 .kr(8)
12361 .sr(1)
12362 .m(2)
12363 .n(4)
12364 .k(8)
12365 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012366 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012367 }
12368
12369 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, strided_cm) {
12370 TEST_REQUIRES_X86_SSE2;
12371 GemmMicrokernelTester()
12372 .mr(2)
12373 .nr(4)
12374 .kr(8)
12375 .sr(1)
12376 .m(2)
12377 .n(4)
12378 .k(8)
12379 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080012380 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012381 }
12382
12383 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, no_a_zero_point) {
12384 TEST_REQUIRES_X86_SSE2;
12385 for (size_t k = 1; k <= 40; k += 9) {
12386 GemmMicrokernelTester()
12387 .mr(2)
12388 .nr(4)
12389 .kr(8)
12390 .sr(1)
12391 .m(2)
12392 .n(4)
12393 .k(k)
12394 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080012395 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012396 }
12397 }
12398
12399 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, no_b_zero_point) {
12400 TEST_REQUIRES_X86_SSE2;
12401 for (size_t k = 1; k <= 40; k += 9) {
12402 GemmMicrokernelTester()
12403 .mr(2)
12404 .nr(4)
12405 .kr(8)
12406 .sr(1)
12407 .m(2)
12408 .n(4)
12409 .k(k)
12410 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080012411 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012412 }
12413 }
12414
12415 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, no_zero_point) {
12416 TEST_REQUIRES_X86_SSE2;
12417 for (size_t k = 1; k <= 40; k += 9) {
12418 GemmMicrokernelTester()
12419 .mr(2)
12420 .nr(4)
12421 .kr(8)
12422 .sr(1)
12423 .m(2)
12424 .n(4)
12425 .k(k)
12426 .a_zero_point(0)
12427 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080012428 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012429 }
12430 }
12431#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
12432
12433
12434#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012435 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_eq_8) {
12436 TEST_REQUIRES_X86_SSE41;
12437 GemmMicrokernelTester()
12438 .mr(1)
12439 .nr(4)
12440 .kr(8)
12441 .sr(1)
12442 .m(1)
12443 .n(4)
12444 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080012445 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012446 }
12447
12448 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, strided_cn) {
12449 TEST_REQUIRES_X86_SSE41;
12450 GemmMicrokernelTester()
12451 .mr(1)
12452 .nr(4)
12453 .kr(8)
12454 .sr(1)
12455 .m(1)
12456 .n(4)
12457 .k(8)
12458 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080012459 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012460 }
12461
12462 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_eq_8_subtile) {
12463 TEST_REQUIRES_X86_SSE41;
Zhi An Ng83844ae2022-01-14 09:52:25 -080012464 for (uint32_t n = 1; n <= 4; n++) {
12465 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012466 GemmMicrokernelTester()
12467 .mr(1)
12468 .nr(4)
12469 .kr(8)
12470 .sr(1)
12471 .m(m)
12472 .n(n)
12473 .k(8)
12474 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012475 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012476 }
12477 }
12478 }
12479
12480 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_eq_8_subtile_m) {
12481 TEST_REQUIRES_X86_SSE41;
12482 for (uint32_t m = 1; m <= 1; m++) {
12483 GemmMicrokernelTester()
12484 .mr(1)
12485 .nr(4)
12486 .kr(8)
12487 .sr(1)
12488 .m(m)
12489 .n(4)
12490 .k(8)
12491 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012492 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012493 }
12494 }
12495
12496 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_eq_8_subtile_n) {
12497 TEST_REQUIRES_X86_SSE41;
12498 for (uint32_t n = 1; n <= 4; n++) {
12499 GemmMicrokernelTester()
12500 .mr(1)
12501 .nr(4)
12502 .kr(8)
12503 .sr(1)
12504 .m(1)
12505 .n(n)
12506 .k(8)
12507 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012508 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012509 }
12510 }
12511
12512 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_lt_8) {
12513 TEST_REQUIRES_X86_SSE41;
12514 for (size_t k = 1; k < 8; k++) {
12515 GemmMicrokernelTester()
12516 .mr(1)
12517 .nr(4)
12518 .kr(8)
12519 .sr(1)
12520 .m(1)
12521 .n(4)
12522 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080012523 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012524 }
12525 }
12526
12527 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_lt_8_subtile) {
12528 TEST_REQUIRES_X86_SSE41;
12529 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012530 for (uint32_t n = 1; n <= 4; n++) {
12531 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012532 GemmMicrokernelTester()
12533 .mr(1)
12534 .nr(4)
12535 .kr(8)
12536 .sr(1)
12537 .m(m)
12538 .n(n)
12539 .k(k)
12540 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012541 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012542 }
12543 }
12544 }
12545 }
12546
12547 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_gt_8) {
12548 TEST_REQUIRES_X86_SSE41;
12549 for (size_t k = 9; k < 16; k++) {
12550 GemmMicrokernelTester()
12551 .mr(1)
12552 .nr(4)
12553 .kr(8)
12554 .sr(1)
12555 .m(1)
12556 .n(4)
12557 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080012558 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012559 }
12560 }
12561
12562 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_gt_8_subtile) {
12563 TEST_REQUIRES_X86_SSE41;
12564 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012565 for (uint32_t n = 1; n <= 4; n++) {
12566 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012567 GemmMicrokernelTester()
12568 .mr(1)
12569 .nr(4)
12570 .kr(8)
12571 .sr(1)
12572 .m(m)
12573 .n(n)
12574 .k(k)
12575 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012576 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012577 }
12578 }
12579 }
12580 }
12581
12582 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_div_8) {
12583 TEST_REQUIRES_X86_SSE41;
12584 for (size_t k = 16; k <= 80; k += 8) {
12585 GemmMicrokernelTester()
12586 .mr(1)
12587 .nr(4)
12588 .kr(8)
12589 .sr(1)
12590 .m(1)
12591 .n(4)
12592 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080012593 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012594 }
12595 }
12596
12597 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_div_8_subtile) {
12598 TEST_REQUIRES_X86_SSE41;
12599 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012600 for (uint32_t n = 1; n <= 4; n++) {
12601 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012602 GemmMicrokernelTester()
12603 .mr(1)
12604 .nr(4)
12605 .kr(8)
12606 .sr(1)
12607 .m(m)
12608 .n(n)
12609 .k(k)
12610 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012611 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012612 }
12613 }
12614 }
12615 }
12616
12617 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, n_gt_4) {
12618 TEST_REQUIRES_X86_SSE41;
12619 for (uint32_t n = 5; n < 8; n++) {
12620 for (size_t k = 1; k <= 40; k += 9) {
12621 GemmMicrokernelTester()
12622 .mr(1)
12623 .nr(4)
12624 .kr(8)
12625 .sr(1)
12626 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012627 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012628 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080012629 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012630 }
12631 }
12632 }
12633
12634 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, n_gt_4_strided_cn) {
12635 TEST_REQUIRES_X86_SSE41;
12636 for (uint32_t n = 5; n < 8; n++) {
12637 for (size_t k = 1; k <= 40; k += 9) {
12638 GemmMicrokernelTester()
12639 .mr(1)
12640 .nr(4)
12641 .kr(8)
12642 .sr(1)
12643 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012644 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012645 .k(k)
12646 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080012647 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012648 }
12649 }
12650 }
12651
12652 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, n_gt_4_subtile) {
12653 TEST_REQUIRES_X86_SSE41;
12654 for (uint32_t n = 5; n < 8; n++) {
12655 for (size_t k = 1; k <= 40; k += 9) {
12656 for (uint32_t m = 1; m <= 1; m++) {
12657 GemmMicrokernelTester()
12658 .mr(1)
12659 .nr(4)
12660 .kr(8)
12661 .sr(1)
12662 .m(m)
12663 .n(n)
12664 .k(k)
12665 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012666 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012667 }
12668 }
12669 }
12670 }
12671
12672 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, n_div_4) {
12673 TEST_REQUIRES_X86_SSE41;
12674 for (uint32_t n = 8; n <= 12; n += 4) {
12675 for (size_t k = 1; k <= 40; k += 9) {
12676 GemmMicrokernelTester()
12677 .mr(1)
12678 .nr(4)
12679 .kr(8)
12680 .sr(1)
12681 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012682 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012683 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080012684 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012685 }
12686 }
12687 }
12688
12689 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, n_div_4_strided_cn) {
12690 TEST_REQUIRES_X86_SSE41;
12691 for (uint32_t n = 8; n <= 12; n += 4) {
12692 for (size_t k = 1; k <= 40; k += 9) {
12693 GemmMicrokernelTester()
12694 .mr(1)
12695 .nr(4)
12696 .kr(8)
12697 .sr(1)
12698 .m(1)
12699 .n(n)
12700 .k(k)
12701 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080012702 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012703 }
12704 }
12705 }
12706
12707 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, n_div_4_subtile) {
12708 TEST_REQUIRES_X86_SSE41;
12709 for (uint32_t n = 8; n <= 12; n += 4) {
12710 for (size_t k = 1; k <= 40; k += 9) {
12711 for (uint32_t m = 1; m <= 1; m++) {
12712 GemmMicrokernelTester()
12713 .mr(1)
12714 .nr(4)
12715 .kr(8)
12716 .sr(1)
12717 .m(m)
12718 .n(n)
12719 .k(k)
12720 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012721 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012722 }
12723 }
12724 }
12725 }
12726
12727 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, small_kernel) {
12728 TEST_REQUIRES_X86_SSE41;
12729 for (size_t k = 1; k <= 40; k += 9) {
12730 GemmMicrokernelTester()
12731 .mr(1)
12732 .nr(4)
12733 .kr(8)
12734 .sr(1)
12735 .m(1)
12736 .n(4)
12737 .k(k)
12738 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080012739 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012740 }
12741 }
12742
12743 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, small_kernel_subtile) {
12744 TEST_REQUIRES_X86_SSE41;
12745 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012746 for (uint32_t n = 1; n <= 4; n++) {
12747 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012748 GemmMicrokernelTester()
12749 .mr(1)
12750 .nr(4)
12751 .kr(8)
12752 .sr(1)
12753 .m(m)
12754 .n(n)
12755 .k(k)
12756 .ks(3)
12757 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012758 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012759 }
12760 }
12761 }
12762 }
12763
12764 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, n_gt_4_small_kernel) {
12765 TEST_REQUIRES_X86_SSE41;
12766 for (uint32_t n = 5; n < 8; n++) {
12767 for (size_t k = 1; k <= 40; k += 9) {
12768 GemmMicrokernelTester()
12769 .mr(1)
12770 .nr(4)
12771 .kr(8)
12772 .sr(1)
12773 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012774 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012775 .k(k)
12776 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080012777 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012778 }
12779 }
12780 }
12781
12782 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, n_div_4_small_kernel) {
12783 TEST_REQUIRES_X86_SSE41;
12784 for (uint32_t n = 8; n <= 12; n += 4) {
12785 for (size_t k = 1; k <= 40; k += 9) {
12786 GemmMicrokernelTester()
12787 .mr(1)
12788 .nr(4)
12789 .kr(8)
12790 .sr(1)
12791 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012792 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012793 .k(k)
12794 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080012795 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012796 }
12797 }
12798 }
12799
12800 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, strided_cm_subtile) {
12801 TEST_REQUIRES_X86_SSE41;
12802 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012803 for (uint32_t n = 1; n <= 4; n++) {
12804 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012805 GemmMicrokernelTester()
12806 .mr(1)
12807 .nr(4)
12808 .kr(8)
12809 .sr(1)
12810 .m(m)
12811 .n(n)
12812 .k(k)
12813 .cm_stride(7)
12814 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012815 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012816 }
12817 }
12818 }
12819 }
12820
12821 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, a_offset) {
12822 TEST_REQUIRES_X86_SSE41;
12823 for (size_t k = 1; k <= 40; k += 9) {
12824 GemmMicrokernelTester()
12825 .mr(1)
12826 .nr(4)
12827 .kr(8)
12828 .sr(1)
12829 .m(1)
12830 .n(4)
12831 .k(k)
12832 .ks(3)
12833 .a_offset(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080012834 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012835 }
12836 }
12837
12838 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, zero) {
12839 TEST_REQUIRES_X86_SSE41;
Zhi An Ng83844ae2022-01-14 09:52:25 -080012840 for (size_t k = 1; k <= 40; k += 9) {
12841 for (uint32_t mz = 0; mz < 1; mz++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012842 GemmMicrokernelTester()
12843 .mr(1)
12844 .nr(4)
12845 .kr(8)
12846 .sr(1)
12847 .m(1)
12848 .n(4)
12849 .k(k)
12850 .ks(3)
12851 .a_offset(43)
12852 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080012853 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012854 }
12855 }
12856 }
12857
12858 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, qmin) {
12859 TEST_REQUIRES_X86_SSE41;
12860 GemmMicrokernelTester()
12861 .mr(1)
12862 .nr(4)
12863 .kr(8)
12864 .sr(1)
12865 .m(1)
12866 .n(4)
12867 .k(8)
12868 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012869 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012870 }
12871
12872 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, qmax) {
12873 TEST_REQUIRES_X86_SSE41;
12874 GemmMicrokernelTester()
12875 .mr(1)
12876 .nr(4)
12877 .kr(8)
12878 .sr(1)
12879 .m(1)
12880 .n(4)
12881 .k(8)
12882 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012883 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012884 }
12885
12886 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, strided_cm) {
12887 TEST_REQUIRES_X86_SSE41;
12888 GemmMicrokernelTester()
12889 .mr(1)
12890 .nr(4)
12891 .kr(8)
12892 .sr(1)
12893 .m(1)
12894 .n(4)
12895 .k(8)
12896 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080012897 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012898 }
12899
12900 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, no_a_zero_point) {
12901 TEST_REQUIRES_X86_SSE41;
12902 for (size_t k = 1; k <= 40; k += 9) {
12903 GemmMicrokernelTester()
12904 .mr(1)
12905 .nr(4)
12906 .kr(8)
12907 .sr(1)
12908 .m(1)
12909 .n(4)
12910 .k(k)
12911 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080012912 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012913 }
12914 }
12915
12916 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, no_b_zero_point) {
12917 TEST_REQUIRES_X86_SSE41;
12918 for (size_t k = 1; k <= 40; k += 9) {
12919 GemmMicrokernelTester()
12920 .mr(1)
12921 .nr(4)
12922 .kr(8)
12923 .sr(1)
12924 .m(1)
12925 .n(4)
12926 .k(k)
12927 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080012928 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012929 }
12930 }
12931
12932 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, no_zero_point) {
12933 TEST_REQUIRES_X86_SSE41;
12934 for (size_t k = 1; k <= 40; k += 9) {
12935 GemmMicrokernelTester()
12936 .mr(1)
12937 .nr(4)
12938 .kr(8)
12939 .sr(1)
12940 .m(1)
12941 .n(4)
12942 .k(k)
12943 .a_zero_point(0)
12944 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080012945 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012946 }
12947 }
12948#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
12949
12950
12951#if XNN_ARCH_X86 || XNN_ARCH_X86_64
12952 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_eq_8) {
12953 TEST_REQUIRES_X86_SSE41;
12954 GemmMicrokernelTester()
12955 .mr(2)
12956 .nr(4)
12957 .kr(8)
12958 .sr(1)
12959 .m(2)
12960 .n(4)
12961 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080012962 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012963 }
12964
12965 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, strided_cn) {
12966 TEST_REQUIRES_X86_SSE41;
12967 GemmMicrokernelTester()
12968 .mr(2)
12969 .nr(4)
12970 .kr(8)
12971 .sr(1)
12972 .m(2)
12973 .n(4)
12974 .k(8)
12975 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080012976 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012977 }
12978
12979 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_eq_8_subtile) {
12980 TEST_REQUIRES_X86_SSE41;
Zhi An Ng83844ae2022-01-14 09:52:25 -080012981 for (uint32_t n = 1; n <= 4; n++) {
12982 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012983 GemmMicrokernelTester()
12984 .mr(2)
12985 .nr(4)
12986 .kr(8)
12987 .sr(1)
12988 .m(m)
12989 .n(n)
12990 .k(8)
12991 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012992 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070012993 }
12994 }
12995 }
12996
12997 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_eq_8_subtile_m) {
12998 TEST_REQUIRES_X86_SSE41;
12999 for (uint32_t m = 1; m <= 2; m++) {
13000 GemmMicrokernelTester()
13001 .mr(2)
13002 .nr(4)
13003 .kr(8)
13004 .sr(1)
13005 .m(m)
13006 .n(4)
13007 .k(8)
13008 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013009 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013010 }
13011 }
13012
13013 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_eq_8_subtile_n) {
13014 TEST_REQUIRES_X86_SSE41;
13015 for (uint32_t n = 1; n <= 4; n++) {
13016 GemmMicrokernelTester()
13017 .mr(2)
13018 .nr(4)
13019 .kr(8)
13020 .sr(1)
13021 .m(2)
13022 .n(n)
13023 .k(8)
13024 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013025 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013026 }
13027 }
13028
13029 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_lt_8) {
13030 TEST_REQUIRES_X86_SSE41;
13031 for (size_t k = 1; k < 8; k++) {
13032 GemmMicrokernelTester()
13033 .mr(2)
13034 .nr(4)
13035 .kr(8)
13036 .sr(1)
13037 .m(2)
13038 .n(4)
13039 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080013040 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013041 }
13042 }
13043
13044 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_lt_8_subtile) {
13045 TEST_REQUIRES_X86_SSE41;
13046 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013047 for (uint32_t n = 1; n <= 4; n++) {
13048 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013049 GemmMicrokernelTester()
13050 .mr(2)
13051 .nr(4)
13052 .kr(8)
13053 .sr(1)
13054 .m(m)
13055 .n(n)
13056 .k(k)
13057 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013058 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013059 }
13060 }
13061 }
13062 }
13063
13064 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_gt_8) {
13065 TEST_REQUIRES_X86_SSE41;
13066 for (size_t k = 9; k < 16; k++) {
13067 GemmMicrokernelTester()
13068 .mr(2)
13069 .nr(4)
13070 .kr(8)
13071 .sr(1)
13072 .m(2)
13073 .n(4)
13074 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080013075 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013076 }
13077 }
13078
13079 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_gt_8_subtile) {
13080 TEST_REQUIRES_X86_SSE41;
13081 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013082 for (uint32_t n = 1; n <= 4; n++) {
13083 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013084 GemmMicrokernelTester()
13085 .mr(2)
13086 .nr(4)
13087 .kr(8)
13088 .sr(1)
13089 .m(m)
13090 .n(n)
13091 .k(k)
13092 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013093 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013094 }
13095 }
13096 }
13097 }
13098
13099 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_div_8) {
13100 TEST_REQUIRES_X86_SSE41;
13101 for (size_t k = 16; k <= 80; k += 8) {
13102 GemmMicrokernelTester()
13103 .mr(2)
13104 .nr(4)
13105 .kr(8)
13106 .sr(1)
13107 .m(2)
13108 .n(4)
13109 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080013110 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013111 }
13112 }
13113
13114 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_div_8_subtile) {
13115 TEST_REQUIRES_X86_SSE41;
13116 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013117 for (uint32_t n = 1; n <= 4; n++) {
13118 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013119 GemmMicrokernelTester()
13120 .mr(2)
13121 .nr(4)
13122 .kr(8)
13123 .sr(1)
13124 .m(m)
13125 .n(n)
13126 .k(k)
13127 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013128 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013129 }
13130 }
13131 }
13132 }
13133
13134 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, n_gt_4) {
13135 TEST_REQUIRES_X86_SSE41;
13136 for (uint32_t n = 5; n < 8; n++) {
13137 for (size_t k = 1; k <= 40; k += 9) {
13138 GemmMicrokernelTester()
13139 .mr(2)
13140 .nr(4)
13141 .kr(8)
13142 .sr(1)
13143 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013144 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013145 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080013146 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013147 }
13148 }
13149 }
13150
13151 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, n_gt_4_strided_cn) {
13152 TEST_REQUIRES_X86_SSE41;
13153 for (uint32_t n = 5; n < 8; n++) {
13154 for (size_t k = 1; k <= 40; k += 9) {
13155 GemmMicrokernelTester()
13156 .mr(2)
13157 .nr(4)
13158 .kr(8)
13159 .sr(1)
13160 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013161 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013162 .k(k)
13163 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080013164 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013165 }
13166 }
13167 }
13168
13169 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, n_gt_4_subtile) {
13170 TEST_REQUIRES_X86_SSE41;
13171 for (uint32_t n = 5; n < 8; n++) {
13172 for (size_t k = 1; k <= 40; k += 9) {
13173 for (uint32_t m = 1; m <= 2; m++) {
13174 GemmMicrokernelTester()
13175 .mr(2)
13176 .nr(4)
13177 .kr(8)
13178 .sr(1)
13179 .m(m)
13180 .n(n)
13181 .k(k)
13182 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013183 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013184 }
13185 }
13186 }
13187 }
13188
13189 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, n_div_4) {
13190 TEST_REQUIRES_X86_SSE41;
13191 for (uint32_t n = 8; n <= 12; n += 4) {
13192 for (size_t k = 1; k <= 40; k += 9) {
13193 GemmMicrokernelTester()
13194 .mr(2)
13195 .nr(4)
13196 .kr(8)
13197 .sr(1)
13198 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013199 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013200 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080013201 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013202 }
13203 }
13204 }
13205
13206 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, n_div_4_strided_cn) {
13207 TEST_REQUIRES_X86_SSE41;
13208 for (uint32_t n = 8; n <= 12; n += 4) {
13209 for (size_t k = 1; k <= 40; k += 9) {
13210 GemmMicrokernelTester()
13211 .mr(2)
13212 .nr(4)
13213 .kr(8)
13214 .sr(1)
13215 .m(2)
13216 .n(n)
13217 .k(k)
13218 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080013219 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013220 }
13221 }
13222 }
13223
13224 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, n_div_4_subtile) {
13225 TEST_REQUIRES_X86_SSE41;
13226 for (uint32_t n = 8; n <= 12; n += 4) {
13227 for (size_t k = 1; k <= 40; k += 9) {
13228 for (uint32_t m = 1; m <= 2; m++) {
13229 GemmMicrokernelTester()
13230 .mr(2)
13231 .nr(4)
13232 .kr(8)
13233 .sr(1)
13234 .m(m)
13235 .n(n)
13236 .k(k)
13237 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013238 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013239 }
13240 }
13241 }
13242 }
13243
13244 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, small_kernel) {
13245 TEST_REQUIRES_X86_SSE41;
13246 for (size_t k = 1; k <= 40; k += 9) {
13247 GemmMicrokernelTester()
13248 .mr(2)
13249 .nr(4)
13250 .kr(8)
13251 .sr(1)
13252 .m(2)
13253 .n(4)
13254 .k(k)
13255 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080013256 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013257 }
13258 }
13259
13260 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, small_kernel_subtile) {
13261 TEST_REQUIRES_X86_SSE41;
13262 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013263 for (uint32_t n = 1; n <= 4; n++) {
13264 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013265 GemmMicrokernelTester()
13266 .mr(2)
13267 .nr(4)
13268 .kr(8)
13269 .sr(1)
13270 .m(m)
13271 .n(n)
13272 .k(k)
13273 .ks(3)
13274 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013275 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013276 }
13277 }
13278 }
13279 }
13280
13281 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, n_gt_4_small_kernel) {
13282 TEST_REQUIRES_X86_SSE41;
13283 for (uint32_t n = 5; n < 8; n++) {
13284 for (size_t k = 1; k <= 40; k += 9) {
13285 GemmMicrokernelTester()
13286 .mr(2)
13287 .nr(4)
13288 .kr(8)
13289 .sr(1)
13290 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013291 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013292 .k(k)
13293 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080013294 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013295 }
13296 }
13297 }
13298
13299 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, n_div_4_small_kernel) {
13300 TEST_REQUIRES_X86_SSE41;
13301 for (uint32_t n = 8; n <= 12; n += 4) {
13302 for (size_t k = 1; k <= 40; k += 9) {
13303 GemmMicrokernelTester()
13304 .mr(2)
13305 .nr(4)
13306 .kr(8)
13307 .sr(1)
13308 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013309 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013310 .k(k)
13311 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080013312 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013313 }
13314 }
13315 }
13316
13317 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, strided_cm_subtile) {
13318 TEST_REQUIRES_X86_SSE41;
13319 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013320 for (uint32_t n = 1; n <= 4; n++) {
13321 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013322 GemmMicrokernelTester()
13323 .mr(2)
13324 .nr(4)
13325 .kr(8)
13326 .sr(1)
13327 .m(m)
13328 .n(n)
13329 .k(k)
13330 .cm_stride(7)
13331 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013332 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013333 }
13334 }
13335 }
13336 }
13337
13338 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, a_offset) {
13339 TEST_REQUIRES_X86_SSE41;
13340 for (size_t k = 1; k <= 40; k += 9) {
13341 GemmMicrokernelTester()
13342 .mr(2)
13343 .nr(4)
13344 .kr(8)
13345 .sr(1)
13346 .m(2)
13347 .n(4)
13348 .k(k)
13349 .ks(3)
13350 .a_offset(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080013351 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013352 }
13353 }
13354
13355 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, zero) {
13356 TEST_REQUIRES_X86_SSE41;
Zhi An Ng83844ae2022-01-14 09:52:25 -080013357 for (size_t k = 1; k <= 40; k += 9) {
13358 for (uint32_t mz = 0; mz < 2; mz++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013359 GemmMicrokernelTester()
13360 .mr(2)
13361 .nr(4)
13362 .kr(8)
13363 .sr(1)
13364 .m(2)
13365 .n(4)
13366 .k(k)
13367 .ks(3)
13368 .a_offset(83)
13369 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080013370 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013371 }
13372 }
13373 }
13374
13375 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, qmin) {
13376 TEST_REQUIRES_X86_SSE41;
13377 GemmMicrokernelTester()
13378 .mr(2)
13379 .nr(4)
13380 .kr(8)
13381 .sr(1)
13382 .m(2)
13383 .n(4)
13384 .k(8)
13385 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013386 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013387 }
13388
13389 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, qmax) {
13390 TEST_REQUIRES_X86_SSE41;
13391 GemmMicrokernelTester()
13392 .mr(2)
13393 .nr(4)
13394 .kr(8)
13395 .sr(1)
13396 .m(2)
13397 .n(4)
13398 .k(8)
13399 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013400 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013401 }
13402
13403 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, strided_cm) {
13404 TEST_REQUIRES_X86_SSE41;
13405 GemmMicrokernelTester()
13406 .mr(2)
13407 .nr(4)
13408 .kr(8)
13409 .sr(1)
13410 .m(2)
13411 .n(4)
13412 .k(8)
13413 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080013414 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013415 }
13416
13417 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, no_a_zero_point) {
13418 TEST_REQUIRES_X86_SSE41;
13419 for (size_t k = 1; k <= 40; k += 9) {
13420 GemmMicrokernelTester()
13421 .mr(2)
13422 .nr(4)
13423 .kr(8)
13424 .sr(1)
13425 .m(2)
13426 .n(4)
13427 .k(k)
13428 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080013429 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013430 }
13431 }
13432
13433 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, no_b_zero_point) {
13434 TEST_REQUIRES_X86_SSE41;
13435 for (size_t k = 1; k <= 40; k += 9) {
13436 GemmMicrokernelTester()
13437 .mr(2)
13438 .nr(4)
13439 .kr(8)
13440 .sr(1)
13441 .m(2)
13442 .n(4)
13443 .k(k)
13444 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080013445 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013446 }
13447 }
13448
13449 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, no_zero_point) {
13450 TEST_REQUIRES_X86_SSE41;
13451 for (size_t k = 1; k <= 40; k += 9) {
13452 GemmMicrokernelTester()
13453 .mr(2)
13454 .nr(4)
13455 .kr(8)
13456 .sr(1)
13457 .m(2)
13458 .n(4)
13459 .k(k)
13460 .a_zero_point(0)
13461 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080013462 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013463 }
13464 }
13465#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
13466
13467
13468#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013469 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_eq_8) {
13470 TEST_REQUIRES_X86_AVX;
13471 GemmMicrokernelTester()
13472 .mr(2)
13473 .nr(4)
13474 .kr(8)
13475 .sr(1)
13476 .m(2)
13477 .n(4)
13478 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080013479 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013480 }
13481
13482 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, strided_cn) {
13483 TEST_REQUIRES_X86_AVX;
13484 GemmMicrokernelTester()
13485 .mr(2)
13486 .nr(4)
13487 .kr(8)
13488 .sr(1)
13489 .m(2)
13490 .n(4)
13491 .k(8)
13492 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080013493 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013494 }
13495
13496 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_eq_8_subtile) {
13497 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080013498 for (uint32_t n = 1; n <= 4; n++) {
13499 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013500 GemmMicrokernelTester()
13501 .mr(2)
13502 .nr(4)
13503 .kr(8)
13504 .sr(1)
13505 .m(m)
13506 .n(n)
13507 .k(8)
13508 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013509 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013510 }
13511 }
13512 }
13513
13514 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_eq_8_subtile_m) {
13515 TEST_REQUIRES_X86_AVX;
13516 for (uint32_t m = 1; m <= 2; m++) {
13517 GemmMicrokernelTester()
13518 .mr(2)
13519 .nr(4)
13520 .kr(8)
13521 .sr(1)
13522 .m(m)
13523 .n(4)
13524 .k(8)
13525 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013526 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013527 }
13528 }
13529
13530 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_eq_8_subtile_n) {
13531 TEST_REQUIRES_X86_AVX;
13532 for (uint32_t n = 1; n <= 4; n++) {
13533 GemmMicrokernelTester()
13534 .mr(2)
13535 .nr(4)
13536 .kr(8)
13537 .sr(1)
13538 .m(2)
13539 .n(n)
13540 .k(8)
13541 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013542 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013543 }
13544 }
13545
13546 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_lt_8) {
13547 TEST_REQUIRES_X86_AVX;
13548 for (size_t k = 1; k < 8; k++) {
13549 GemmMicrokernelTester()
13550 .mr(2)
13551 .nr(4)
13552 .kr(8)
13553 .sr(1)
13554 .m(2)
13555 .n(4)
13556 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080013557 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013558 }
13559 }
13560
13561 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_lt_8_subtile) {
13562 TEST_REQUIRES_X86_AVX;
13563 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013564 for (uint32_t n = 1; n <= 4; n++) {
13565 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013566 GemmMicrokernelTester()
13567 .mr(2)
13568 .nr(4)
13569 .kr(8)
13570 .sr(1)
13571 .m(m)
13572 .n(n)
13573 .k(k)
13574 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013575 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013576 }
13577 }
13578 }
13579 }
13580
13581 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_gt_8) {
13582 TEST_REQUIRES_X86_AVX;
13583 for (size_t k = 9; k < 16; k++) {
13584 GemmMicrokernelTester()
13585 .mr(2)
13586 .nr(4)
13587 .kr(8)
13588 .sr(1)
13589 .m(2)
13590 .n(4)
13591 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080013592 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013593 }
13594 }
13595
13596 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_gt_8_subtile) {
13597 TEST_REQUIRES_X86_AVX;
13598 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013599 for (uint32_t n = 1; n <= 4; n++) {
13600 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013601 GemmMicrokernelTester()
13602 .mr(2)
13603 .nr(4)
13604 .kr(8)
13605 .sr(1)
13606 .m(m)
13607 .n(n)
13608 .k(k)
13609 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013610 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013611 }
13612 }
13613 }
13614 }
13615
13616 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_div_8) {
13617 TEST_REQUIRES_X86_AVX;
13618 for (size_t k = 16; k <= 80; k += 8) {
13619 GemmMicrokernelTester()
13620 .mr(2)
13621 .nr(4)
13622 .kr(8)
13623 .sr(1)
13624 .m(2)
13625 .n(4)
13626 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080013627 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013628 }
13629 }
13630
13631 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_div_8_subtile) {
13632 TEST_REQUIRES_X86_AVX;
13633 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013634 for (uint32_t n = 1; n <= 4; n++) {
13635 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013636 GemmMicrokernelTester()
13637 .mr(2)
13638 .nr(4)
13639 .kr(8)
13640 .sr(1)
13641 .m(m)
13642 .n(n)
13643 .k(k)
13644 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013645 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013646 }
13647 }
13648 }
13649 }
13650
13651 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_gt_4) {
13652 TEST_REQUIRES_X86_AVX;
13653 for (uint32_t n = 5; n < 8; n++) {
13654 for (size_t k = 1; k <= 40; k += 9) {
13655 GemmMicrokernelTester()
13656 .mr(2)
13657 .nr(4)
13658 .kr(8)
13659 .sr(1)
13660 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013661 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013662 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080013663 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013664 }
13665 }
13666 }
13667
13668 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_gt_4_strided_cn) {
13669 TEST_REQUIRES_X86_AVX;
13670 for (uint32_t n = 5; n < 8; n++) {
13671 for (size_t k = 1; k <= 40; k += 9) {
13672 GemmMicrokernelTester()
13673 .mr(2)
13674 .nr(4)
13675 .kr(8)
13676 .sr(1)
13677 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013678 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013679 .k(k)
13680 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080013681 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013682 }
13683 }
13684 }
13685
13686 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_gt_4_subtile) {
13687 TEST_REQUIRES_X86_AVX;
13688 for (uint32_t n = 5; n < 8; n++) {
13689 for (size_t k = 1; k <= 40; k += 9) {
13690 for (uint32_t m = 1; m <= 2; m++) {
13691 GemmMicrokernelTester()
13692 .mr(2)
13693 .nr(4)
13694 .kr(8)
13695 .sr(1)
13696 .m(m)
13697 .n(n)
13698 .k(k)
13699 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013700 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013701 }
13702 }
13703 }
13704 }
13705
13706 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_div_4) {
13707 TEST_REQUIRES_X86_AVX;
13708 for (uint32_t n = 8; n <= 12; n += 4) {
13709 for (size_t k = 1; k <= 40; k += 9) {
13710 GemmMicrokernelTester()
13711 .mr(2)
13712 .nr(4)
13713 .kr(8)
13714 .sr(1)
13715 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013716 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013717 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080013718 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013719 }
13720 }
13721 }
13722
13723 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_div_4_strided_cn) {
13724 TEST_REQUIRES_X86_AVX;
13725 for (uint32_t n = 8; n <= 12; n += 4) {
13726 for (size_t k = 1; k <= 40; k += 9) {
13727 GemmMicrokernelTester()
13728 .mr(2)
13729 .nr(4)
13730 .kr(8)
13731 .sr(1)
13732 .m(2)
13733 .n(n)
13734 .k(k)
13735 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080013736 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013737 }
13738 }
13739 }
13740
13741 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_div_4_subtile) {
13742 TEST_REQUIRES_X86_AVX;
13743 for (uint32_t n = 8; n <= 12; n += 4) {
13744 for (size_t k = 1; k <= 40; k += 9) {
13745 for (uint32_t m = 1; m <= 2; m++) {
13746 GemmMicrokernelTester()
13747 .mr(2)
13748 .nr(4)
13749 .kr(8)
13750 .sr(1)
13751 .m(m)
13752 .n(n)
13753 .k(k)
13754 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013755 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013756 }
13757 }
13758 }
13759 }
13760
13761 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, small_kernel) {
13762 TEST_REQUIRES_X86_AVX;
13763 for (size_t k = 1; k <= 40; k += 9) {
13764 GemmMicrokernelTester()
13765 .mr(2)
13766 .nr(4)
13767 .kr(8)
13768 .sr(1)
13769 .m(2)
13770 .n(4)
13771 .k(k)
13772 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080013773 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013774 }
13775 }
13776
13777 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, small_kernel_subtile) {
13778 TEST_REQUIRES_X86_AVX;
13779 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013780 for (uint32_t n = 1; n <= 4; n++) {
13781 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013782 GemmMicrokernelTester()
13783 .mr(2)
13784 .nr(4)
13785 .kr(8)
13786 .sr(1)
13787 .m(m)
13788 .n(n)
13789 .k(k)
13790 .ks(3)
13791 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013792 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013793 }
13794 }
13795 }
13796 }
13797
13798 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_gt_4_small_kernel) {
13799 TEST_REQUIRES_X86_AVX;
13800 for (uint32_t n = 5; n < 8; n++) {
13801 for (size_t k = 1; k <= 40; k += 9) {
13802 GemmMicrokernelTester()
13803 .mr(2)
13804 .nr(4)
13805 .kr(8)
13806 .sr(1)
13807 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013808 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013809 .k(k)
13810 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080013811 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013812 }
13813 }
13814 }
13815
13816 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_div_4_small_kernel) {
13817 TEST_REQUIRES_X86_AVX;
13818 for (uint32_t n = 8; n <= 12; n += 4) {
13819 for (size_t k = 1; k <= 40; k += 9) {
13820 GemmMicrokernelTester()
13821 .mr(2)
13822 .nr(4)
13823 .kr(8)
13824 .sr(1)
13825 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013826 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013827 .k(k)
13828 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080013829 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013830 }
13831 }
13832 }
13833
13834 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, strided_cm_subtile) {
13835 TEST_REQUIRES_X86_AVX;
13836 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013837 for (uint32_t n = 1; n <= 4; n++) {
13838 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013839 GemmMicrokernelTester()
13840 .mr(2)
13841 .nr(4)
13842 .kr(8)
13843 .sr(1)
13844 .m(m)
13845 .n(n)
13846 .k(k)
13847 .cm_stride(7)
13848 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013849 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013850 }
13851 }
13852 }
13853 }
13854
13855 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, a_offset) {
13856 TEST_REQUIRES_X86_AVX;
13857 for (size_t k = 1; k <= 40; k += 9) {
13858 GemmMicrokernelTester()
13859 .mr(2)
13860 .nr(4)
13861 .kr(8)
13862 .sr(1)
13863 .m(2)
13864 .n(4)
13865 .k(k)
13866 .ks(3)
13867 .a_offset(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080013868 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013869 }
13870 }
13871
13872 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, zero) {
13873 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080013874 for (size_t k = 1; k <= 40; k += 9) {
13875 for (uint32_t mz = 0; mz < 2; mz++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013876 GemmMicrokernelTester()
13877 .mr(2)
13878 .nr(4)
13879 .kr(8)
13880 .sr(1)
13881 .m(2)
13882 .n(4)
13883 .k(k)
13884 .ks(3)
13885 .a_offset(83)
13886 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080013887 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013888 }
13889 }
13890 }
13891
13892 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, qmin) {
13893 TEST_REQUIRES_X86_AVX;
13894 GemmMicrokernelTester()
13895 .mr(2)
13896 .nr(4)
13897 .kr(8)
13898 .sr(1)
13899 .m(2)
13900 .n(4)
13901 .k(8)
13902 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013903 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013904 }
13905
13906 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, qmax) {
13907 TEST_REQUIRES_X86_AVX;
13908 GemmMicrokernelTester()
13909 .mr(2)
13910 .nr(4)
13911 .kr(8)
13912 .sr(1)
13913 .m(2)
13914 .n(4)
13915 .k(8)
13916 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013917 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013918 }
13919
13920 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, strided_cm) {
13921 TEST_REQUIRES_X86_AVX;
13922 GemmMicrokernelTester()
13923 .mr(2)
13924 .nr(4)
13925 .kr(8)
13926 .sr(1)
13927 .m(2)
13928 .n(4)
13929 .k(8)
13930 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080013931 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013932 }
13933
13934 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, no_a_zero_point) {
13935 TEST_REQUIRES_X86_AVX;
13936 for (size_t k = 1; k <= 40; k += 9) {
13937 GemmMicrokernelTester()
13938 .mr(2)
13939 .nr(4)
13940 .kr(8)
13941 .sr(1)
13942 .m(2)
13943 .n(4)
13944 .k(k)
13945 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080013946 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013947 }
13948 }
13949
13950 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, no_b_zero_point) {
13951 TEST_REQUIRES_X86_AVX;
13952 for (size_t k = 1; k <= 40; k += 9) {
13953 GemmMicrokernelTester()
13954 .mr(2)
13955 .nr(4)
13956 .kr(8)
13957 .sr(1)
13958 .m(2)
13959 .n(4)
13960 .k(k)
13961 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080013962 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013963 }
13964 }
13965
13966 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, no_zero_point) {
13967 TEST_REQUIRES_X86_AVX;
13968 for (size_t k = 1; k <= 40; k += 9) {
13969 GemmMicrokernelTester()
13970 .mr(2)
13971 .nr(4)
13972 .kr(8)
13973 .sr(1)
13974 .m(2)
13975 .n(4)
13976 .k(k)
13977 .a_zero_point(0)
13978 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080013979 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013980 }
13981 }
13982#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
13983
13984
13985#if XNN_ARCH_X86 || XNN_ARCH_X86_64
13986 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_eq_8) {
13987 TEST_REQUIRES_X86_AVX;
13988 GemmMicrokernelTester()
13989 .mr(3)
13990 .nr(4)
13991 .kr(8)
13992 .sr(1)
13993 .m(3)
13994 .n(4)
13995 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080013996 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070013997 }
13998
13999 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, strided_cn) {
14000 TEST_REQUIRES_X86_AVX;
14001 GemmMicrokernelTester()
14002 .mr(3)
14003 .nr(4)
14004 .kr(8)
14005 .sr(1)
14006 .m(3)
14007 .n(4)
14008 .k(8)
14009 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080014010 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014011 }
14012
14013 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_eq_8_subtile) {
14014 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080014015 for (uint32_t n = 1; n <= 4; n++) {
14016 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014017 GemmMicrokernelTester()
14018 .mr(3)
14019 .nr(4)
14020 .kr(8)
14021 .sr(1)
14022 .m(m)
14023 .n(n)
14024 .k(8)
14025 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014026 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014027 }
14028 }
14029 }
14030
14031 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_eq_8_subtile_m) {
14032 TEST_REQUIRES_X86_AVX;
14033 for (uint32_t m = 1; m <= 3; m++) {
14034 GemmMicrokernelTester()
14035 .mr(3)
14036 .nr(4)
14037 .kr(8)
14038 .sr(1)
14039 .m(m)
14040 .n(4)
14041 .k(8)
14042 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014043 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014044 }
14045 }
14046
14047 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_eq_8_subtile_n) {
14048 TEST_REQUIRES_X86_AVX;
14049 for (uint32_t n = 1; n <= 4; n++) {
14050 GemmMicrokernelTester()
14051 .mr(3)
14052 .nr(4)
14053 .kr(8)
14054 .sr(1)
14055 .m(3)
14056 .n(n)
14057 .k(8)
14058 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014059 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014060 }
14061 }
14062
14063 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_lt_8) {
14064 TEST_REQUIRES_X86_AVX;
14065 for (size_t k = 1; k < 8; k++) {
14066 GemmMicrokernelTester()
14067 .mr(3)
14068 .nr(4)
14069 .kr(8)
14070 .sr(1)
14071 .m(3)
14072 .n(4)
14073 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080014074 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014075 }
14076 }
14077
14078 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_lt_8_subtile) {
14079 TEST_REQUIRES_X86_AVX;
14080 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014081 for (uint32_t n = 1; n <= 4; n++) {
14082 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014083 GemmMicrokernelTester()
14084 .mr(3)
14085 .nr(4)
14086 .kr(8)
14087 .sr(1)
14088 .m(m)
14089 .n(n)
14090 .k(k)
14091 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014092 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014093 }
14094 }
14095 }
14096 }
14097
14098 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_gt_8) {
14099 TEST_REQUIRES_X86_AVX;
14100 for (size_t k = 9; k < 16; k++) {
14101 GemmMicrokernelTester()
14102 .mr(3)
14103 .nr(4)
14104 .kr(8)
14105 .sr(1)
14106 .m(3)
14107 .n(4)
14108 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080014109 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014110 }
14111 }
14112
14113 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_gt_8_subtile) {
14114 TEST_REQUIRES_X86_AVX;
14115 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014116 for (uint32_t n = 1; n <= 4; n++) {
14117 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014118 GemmMicrokernelTester()
14119 .mr(3)
14120 .nr(4)
14121 .kr(8)
14122 .sr(1)
14123 .m(m)
14124 .n(n)
14125 .k(k)
14126 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014127 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014128 }
14129 }
14130 }
14131 }
14132
14133 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_div_8) {
14134 TEST_REQUIRES_X86_AVX;
14135 for (size_t k = 16; k <= 80; k += 8) {
14136 GemmMicrokernelTester()
14137 .mr(3)
14138 .nr(4)
14139 .kr(8)
14140 .sr(1)
14141 .m(3)
14142 .n(4)
14143 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080014144 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014145 }
14146 }
14147
14148 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_div_8_subtile) {
14149 TEST_REQUIRES_X86_AVX;
14150 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014151 for (uint32_t n = 1; n <= 4; n++) {
14152 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014153 GemmMicrokernelTester()
14154 .mr(3)
14155 .nr(4)
14156 .kr(8)
14157 .sr(1)
14158 .m(m)
14159 .n(n)
14160 .k(k)
14161 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014162 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014163 }
14164 }
14165 }
14166 }
14167
14168 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_gt_4) {
14169 TEST_REQUIRES_X86_AVX;
14170 for (uint32_t n = 5; n < 8; n++) {
14171 for (size_t k = 1; k <= 40; k += 9) {
14172 GemmMicrokernelTester()
14173 .mr(3)
14174 .nr(4)
14175 .kr(8)
14176 .sr(1)
14177 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014178 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014179 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080014180 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014181 }
14182 }
14183 }
14184
14185 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_gt_4_strided_cn) {
14186 TEST_REQUIRES_X86_AVX;
14187 for (uint32_t n = 5; n < 8; n++) {
14188 for (size_t k = 1; k <= 40; k += 9) {
14189 GemmMicrokernelTester()
14190 .mr(3)
14191 .nr(4)
14192 .kr(8)
14193 .sr(1)
14194 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014195 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014196 .k(k)
14197 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080014198 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014199 }
14200 }
14201 }
14202
14203 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_gt_4_subtile) {
14204 TEST_REQUIRES_X86_AVX;
14205 for (uint32_t n = 5; n < 8; n++) {
14206 for (size_t k = 1; k <= 40; k += 9) {
14207 for (uint32_t m = 1; m <= 3; m++) {
14208 GemmMicrokernelTester()
14209 .mr(3)
14210 .nr(4)
14211 .kr(8)
14212 .sr(1)
14213 .m(m)
14214 .n(n)
14215 .k(k)
14216 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014217 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014218 }
14219 }
14220 }
14221 }
14222
14223 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_div_4) {
14224 TEST_REQUIRES_X86_AVX;
14225 for (uint32_t n = 8; n <= 12; n += 4) {
14226 for (size_t k = 1; k <= 40; k += 9) {
14227 GemmMicrokernelTester()
14228 .mr(3)
14229 .nr(4)
14230 .kr(8)
14231 .sr(1)
14232 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014233 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014234 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080014235 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014236 }
14237 }
14238 }
14239
14240 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_div_4_strided_cn) {
14241 TEST_REQUIRES_X86_AVX;
14242 for (uint32_t n = 8; n <= 12; n += 4) {
14243 for (size_t k = 1; k <= 40; k += 9) {
14244 GemmMicrokernelTester()
14245 .mr(3)
14246 .nr(4)
14247 .kr(8)
14248 .sr(1)
14249 .m(3)
14250 .n(n)
14251 .k(k)
14252 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080014253 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014254 }
14255 }
14256 }
14257
14258 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_div_4_subtile) {
14259 TEST_REQUIRES_X86_AVX;
14260 for (uint32_t n = 8; n <= 12; n += 4) {
14261 for (size_t k = 1; k <= 40; k += 9) {
14262 for (uint32_t m = 1; m <= 3; m++) {
14263 GemmMicrokernelTester()
14264 .mr(3)
14265 .nr(4)
14266 .kr(8)
14267 .sr(1)
14268 .m(m)
14269 .n(n)
14270 .k(k)
14271 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014272 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014273 }
14274 }
14275 }
14276 }
14277
14278 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, small_kernel) {
14279 TEST_REQUIRES_X86_AVX;
14280 for (size_t k = 1; k <= 40; k += 9) {
14281 GemmMicrokernelTester()
14282 .mr(3)
14283 .nr(4)
14284 .kr(8)
14285 .sr(1)
14286 .m(3)
14287 .n(4)
14288 .k(k)
14289 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080014290 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014291 }
14292 }
14293
14294 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, small_kernel_subtile) {
14295 TEST_REQUIRES_X86_AVX;
14296 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014297 for (uint32_t n = 1; n <= 4; n++) {
14298 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014299 GemmMicrokernelTester()
14300 .mr(3)
14301 .nr(4)
14302 .kr(8)
14303 .sr(1)
14304 .m(m)
14305 .n(n)
14306 .k(k)
14307 .ks(3)
14308 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014309 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014310 }
14311 }
14312 }
14313 }
14314
14315 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_gt_4_small_kernel) {
14316 TEST_REQUIRES_X86_AVX;
14317 for (uint32_t n = 5; n < 8; n++) {
14318 for (size_t k = 1; k <= 40; k += 9) {
14319 GemmMicrokernelTester()
14320 .mr(3)
14321 .nr(4)
14322 .kr(8)
14323 .sr(1)
14324 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014325 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014326 .k(k)
14327 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080014328 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014329 }
14330 }
14331 }
14332
14333 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_div_4_small_kernel) {
14334 TEST_REQUIRES_X86_AVX;
14335 for (uint32_t n = 8; n <= 12; n += 4) {
14336 for (size_t k = 1; k <= 40; k += 9) {
14337 GemmMicrokernelTester()
14338 .mr(3)
14339 .nr(4)
14340 .kr(8)
14341 .sr(1)
14342 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014343 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014344 .k(k)
14345 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080014346 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014347 }
14348 }
14349 }
14350
14351 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, strided_cm_subtile) {
14352 TEST_REQUIRES_X86_AVX;
14353 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014354 for (uint32_t n = 1; n <= 4; n++) {
14355 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014356 GemmMicrokernelTester()
14357 .mr(3)
14358 .nr(4)
14359 .kr(8)
14360 .sr(1)
14361 .m(m)
14362 .n(n)
14363 .k(k)
14364 .cm_stride(7)
14365 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014366 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014367 }
14368 }
14369 }
14370 }
14371
14372 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, a_offset) {
14373 TEST_REQUIRES_X86_AVX;
14374 for (size_t k = 1; k <= 40; k += 9) {
14375 GemmMicrokernelTester()
14376 .mr(3)
14377 .nr(4)
14378 .kr(8)
14379 .sr(1)
14380 .m(3)
14381 .n(4)
14382 .k(k)
14383 .ks(3)
14384 .a_offset(127)
Marat Dukhan50323b82022-01-11 00:12:01 -080014385 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014386 }
14387 }
14388
14389 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, zero) {
14390 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080014391 for (size_t k = 1; k <= 40; k += 9) {
14392 for (uint32_t mz = 0; mz < 3; mz++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014393 GemmMicrokernelTester()
14394 .mr(3)
14395 .nr(4)
14396 .kr(8)
14397 .sr(1)
14398 .m(3)
14399 .n(4)
14400 .k(k)
14401 .ks(3)
14402 .a_offset(127)
14403 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080014404 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014405 }
14406 }
14407 }
14408
14409 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, qmin) {
14410 TEST_REQUIRES_X86_AVX;
14411 GemmMicrokernelTester()
14412 .mr(3)
14413 .nr(4)
14414 .kr(8)
14415 .sr(1)
14416 .m(3)
14417 .n(4)
14418 .k(8)
14419 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014420 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014421 }
14422
14423 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, qmax) {
14424 TEST_REQUIRES_X86_AVX;
14425 GemmMicrokernelTester()
14426 .mr(3)
14427 .nr(4)
14428 .kr(8)
14429 .sr(1)
14430 .m(3)
14431 .n(4)
14432 .k(8)
14433 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014434 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014435 }
14436
14437 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, strided_cm) {
14438 TEST_REQUIRES_X86_AVX;
14439 GemmMicrokernelTester()
14440 .mr(3)
14441 .nr(4)
14442 .kr(8)
14443 .sr(1)
14444 .m(3)
14445 .n(4)
14446 .k(8)
14447 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080014448 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014449 }
14450
14451 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, no_a_zero_point) {
14452 TEST_REQUIRES_X86_AVX;
14453 for (size_t k = 1; k <= 40; k += 9) {
14454 GemmMicrokernelTester()
14455 .mr(3)
14456 .nr(4)
14457 .kr(8)
14458 .sr(1)
14459 .m(3)
14460 .n(4)
14461 .k(k)
14462 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080014463 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014464 }
14465 }
14466
14467 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, no_b_zero_point) {
14468 TEST_REQUIRES_X86_AVX;
14469 for (size_t k = 1; k <= 40; k += 9) {
14470 GemmMicrokernelTester()
14471 .mr(3)
14472 .nr(4)
14473 .kr(8)
14474 .sr(1)
14475 .m(3)
14476 .n(4)
14477 .k(k)
14478 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080014479 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014480 }
14481 }
14482
14483 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, no_zero_point) {
14484 TEST_REQUIRES_X86_AVX;
14485 for (size_t k = 1; k <= 40; k += 9) {
14486 GemmMicrokernelTester()
14487 .mr(3)
14488 .nr(4)
14489 .kr(8)
14490 .sr(1)
14491 .m(3)
14492 .n(4)
14493 .k(k)
14494 .a_zero_point(0)
14495 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080014496 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014497 }
14498 }
14499#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
14500
14501
14502#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014503 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_eq_8) {
14504 TEST_REQUIRES_X86_XOP;
14505 GemmMicrokernelTester()
14506 .mr(2)
14507 .nr(4)
14508 .kr(8)
14509 .sr(1)
14510 .m(2)
14511 .n(4)
14512 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080014513 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014514 }
14515
14516 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, strided_cn) {
14517 TEST_REQUIRES_X86_XOP;
14518 GemmMicrokernelTester()
14519 .mr(2)
14520 .nr(4)
14521 .kr(8)
14522 .sr(1)
14523 .m(2)
14524 .n(4)
14525 .k(8)
14526 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080014527 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014528 }
14529
14530 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_eq_8_subtile) {
14531 TEST_REQUIRES_X86_XOP;
Zhi An Ng83844ae2022-01-14 09:52:25 -080014532 for (uint32_t n = 1; n <= 4; n++) {
14533 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014534 GemmMicrokernelTester()
14535 .mr(2)
14536 .nr(4)
14537 .kr(8)
14538 .sr(1)
14539 .m(m)
14540 .n(n)
14541 .k(8)
14542 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014543 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014544 }
14545 }
14546 }
14547
14548 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_eq_8_subtile_m) {
14549 TEST_REQUIRES_X86_XOP;
14550 for (uint32_t m = 1; m <= 2; m++) {
14551 GemmMicrokernelTester()
14552 .mr(2)
14553 .nr(4)
14554 .kr(8)
14555 .sr(1)
14556 .m(m)
14557 .n(4)
14558 .k(8)
14559 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014560 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014561 }
14562 }
14563
14564 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_eq_8_subtile_n) {
14565 TEST_REQUIRES_X86_XOP;
14566 for (uint32_t n = 1; n <= 4; n++) {
14567 GemmMicrokernelTester()
14568 .mr(2)
14569 .nr(4)
14570 .kr(8)
14571 .sr(1)
14572 .m(2)
14573 .n(n)
14574 .k(8)
14575 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014576 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014577 }
14578 }
14579
14580 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_lt_8) {
14581 TEST_REQUIRES_X86_XOP;
14582 for (size_t k = 1; k < 8; k++) {
14583 GemmMicrokernelTester()
14584 .mr(2)
14585 .nr(4)
14586 .kr(8)
14587 .sr(1)
14588 .m(2)
14589 .n(4)
14590 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080014591 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014592 }
14593 }
14594
14595 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_lt_8_subtile) {
14596 TEST_REQUIRES_X86_XOP;
14597 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014598 for (uint32_t n = 1; n <= 4; n++) {
14599 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014600 GemmMicrokernelTester()
14601 .mr(2)
14602 .nr(4)
14603 .kr(8)
14604 .sr(1)
14605 .m(m)
14606 .n(n)
14607 .k(k)
14608 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014609 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014610 }
14611 }
14612 }
14613 }
14614
14615 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_gt_8) {
14616 TEST_REQUIRES_X86_XOP;
14617 for (size_t k = 9; k < 16; k++) {
14618 GemmMicrokernelTester()
14619 .mr(2)
14620 .nr(4)
14621 .kr(8)
14622 .sr(1)
14623 .m(2)
14624 .n(4)
14625 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080014626 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014627 }
14628 }
14629
14630 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_gt_8_subtile) {
14631 TEST_REQUIRES_X86_XOP;
14632 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014633 for (uint32_t n = 1; n <= 4; n++) {
14634 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014635 GemmMicrokernelTester()
14636 .mr(2)
14637 .nr(4)
14638 .kr(8)
14639 .sr(1)
14640 .m(m)
14641 .n(n)
14642 .k(k)
14643 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014644 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014645 }
14646 }
14647 }
14648 }
14649
14650 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_div_8) {
14651 TEST_REQUIRES_X86_XOP;
14652 for (size_t k = 16; k <= 80; k += 8) {
14653 GemmMicrokernelTester()
14654 .mr(2)
14655 .nr(4)
14656 .kr(8)
14657 .sr(1)
14658 .m(2)
14659 .n(4)
14660 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080014661 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014662 }
14663 }
14664
14665 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_div_8_subtile) {
14666 TEST_REQUIRES_X86_XOP;
14667 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014668 for (uint32_t n = 1; n <= 4; n++) {
14669 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014670 GemmMicrokernelTester()
14671 .mr(2)
14672 .nr(4)
14673 .kr(8)
14674 .sr(1)
14675 .m(m)
14676 .n(n)
14677 .k(k)
14678 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014679 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014680 }
14681 }
14682 }
14683 }
14684
14685 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_gt_4) {
14686 TEST_REQUIRES_X86_XOP;
14687 for (uint32_t n = 5; n < 8; n++) {
14688 for (size_t k = 1; k <= 40; k += 9) {
14689 GemmMicrokernelTester()
14690 .mr(2)
14691 .nr(4)
14692 .kr(8)
14693 .sr(1)
14694 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014695 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014696 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080014697 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014698 }
14699 }
14700 }
14701
14702 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_gt_4_strided_cn) {
14703 TEST_REQUIRES_X86_XOP;
14704 for (uint32_t n = 5; n < 8; n++) {
14705 for (size_t k = 1; k <= 40; k += 9) {
14706 GemmMicrokernelTester()
14707 .mr(2)
14708 .nr(4)
14709 .kr(8)
14710 .sr(1)
14711 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014712 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014713 .k(k)
14714 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080014715 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014716 }
14717 }
14718 }
14719
14720 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_gt_4_subtile) {
14721 TEST_REQUIRES_X86_XOP;
14722 for (uint32_t n = 5; n < 8; n++) {
14723 for (size_t k = 1; k <= 40; k += 9) {
14724 for (uint32_t m = 1; m <= 2; m++) {
14725 GemmMicrokernelTester()
14726 .mr(2)
14727 .nr(4)
14728 .kr(8)
14729 .sr(1)
14730 .m(m)
14731 .n(n)
14732 .k(k)
14733 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014734 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014735 }
14736 }
14737 }
14738 }
14739
14740 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_div_4) {
14741 TEST_REQUIRES_X86_XOP;
14742 for (uint32_t n = 8; n <= 12; n += 4) {
14743 for (size_t k = 1; k <= 40; k += 9) {
14744 GemmMicrokernelTester()
14745 .mr(2)
14746 .nr(4)
14747 .kr(8)
14748 .sr(1)
14749 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014750 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014751 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080014752 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014753 }
14754 }
14755 }
14756
14757 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_div_4_strided_cn) {
14758 TEST_REQUIRES_X86_XOP;
14759 for (uint32_t n = 8; n <= 12; n += 4) {
14760 for (size_t k = 1; k <= 40; k += 9) {
14761 GemmMicrokernelTester()
14762 .mr(2)
14763 .nr(4)
14764 .kr(8)
14765 .sr(1)
14766 .m(2)
14767 .n(n)
14768 .k(k)
14769 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080014770 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014771 }
14772 }
14773 }
14774
14775 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_div_4_subtile) {
14776 TEST_REQUIRES_X86_XOP;
14777 for (uint32_t n = 8; n <= 12; n += 4) {
14778 for (size_t k = 1; k <= 40; k += 9) {
14779 for (uint32_t m = 1; m <= 2; m++) {
14780 GemmMicrokernelTester()
14781 .mr(2)
14782 .nr(4)
14783 .kr(8)
14784 .sr(1)
14785 .m(m)
14786 .n(n)
14787 .k(k)
14788 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014789 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014790 }
14791 }
14792 }
14793 }
14794
14795 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, small_kernel) {
14796 TEST_REQUIRES_X86_XOP;
14797 for (size_t k = 1; k <= 40; k += 9) {
14798 GemmMicrokernelTester()
14799 .mr(2)
14800 .nr(4)
14801 .kr(8)
14802 .sr(1)
14803 .m(2)
14804 .n(4)
14805 .k(k)
14806 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080014807 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014808 }
14809 }
14810
14811 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, small_kernel_subtile) {
14812 TEST_REQUIRES_X86_XOP;
14813 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014814 for (uint32_t n = 1; n <= 4; n++) {
14815 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014816 GemmMicrokernelTester()
14817 .mr(2)
14818 .nr(4)
14819 .kr(8)
14820 .sr(1)
14821 .m(m)
14822 .n(n)
14823 .k(k)
14824 .ks(3)
14825 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014826 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014827 }
14828 }
14829 }
14830 }
14831
14832 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_gt_4_small_kernel) {
14833 TEST_REQUIRES_X86_XOP;
14834 for (uint32_t n = 5; n < 8; n++) {
14835 for (size_t k = 1; k <= 40; k += 9) {
14836 GemmMicrokernelTester()
14837 .mr(2)
14838 .nr(4)
14839 .kr(8)
14840 .sr(1)
14841 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014842 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014843 .k(k)
14844 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080014845 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014846 }
14847 }
14848 }
14849
14850 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_div_4_small_kernel) {
14851 TEST_REQUIRES_X86_XOP;
14852 for (uint32_t n = 8; n <= 12; n += 4) {
14853 for (size_t k = 1; k <= 40; k += 9) {
14854 GemmMicrokernelTester()
14855 .mr(2)
14856 .nr(4)
14857 .kr(8)
14858 .sr(1)
14859 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014860 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014861 .k(k)
14862 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080014863 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014864 }
14865 }
14866 }
14867
14868 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, strided_cm_subtile) {
14869 TEST_REQUIRES_X86_XOP;
14870 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014871 for (uint32_t n = 1; n <= 4; n++) {
14872 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014873 GemmMicrokernelTester()
14874 .mr(2)
14875 .nr(4)
14876 .kr(8)
14877 .sr(1)
14878 .m(m)
14879 .n(n)
14880 .k(k)
14881 .cm_stride(7)
14882 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014883 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014884 }
14885 }
14886 }
14887 }
14888
14889 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, a_offset) {
14890 TEST_REQUIRES_X86_XOP;
14891 for (size_t k = 1; k <= 40; k += 9) {
14892 GemmMicrokernelTester()
14893 .mr(2)
14894 .nr(4)
14895 .kr(8)
14896 .sr(1)
14897 .m(2)
14898 .n(4)
14899 .k(k)
14900 .ks(3)
14901 .a_offset(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080014902 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014903 }
14904 }
14905
14906 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, zero) {
14907 TEST_REQUIRES_X86_XOP;
Zhi An Ng83844ae2022-01-14 09:52:25 -080014908 for (size_t k = 1; k <= 40; k += 9) {
14909 for (uint32_t mz = 0; mz < 2; mz++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014910 GemmMicrokernelTester()
14911 .mr(2)
14912 .nr(4)
14913 .kr(8)
14914 .sr(1)
14915 .m(2)
14916 .n(4)
14917 .k(k)
14918 .ks(3)
14919 .a_offset(83)
14920 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080014921 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014922 }
14923 }
14924 }
14925
14926 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, qmin) {
14927 TEST_REQUIRES_X86_XOP;
14928 GemmMicrokernelTester()
14929 .mr(2)
14930 .nr(4)
14931 .kr(8)
14932 .sr(1)
14933 .m(2)
14934 .n(4)
14935 .k(8)
14936 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014937 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014938 }
14939
14940 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, qmax) {
14941 TEST_REQUIRES_X86_XOP;
14942 GemmMicrokernelTester()
14943 .mr(2)
14944 .nr(4)
14945 .kr(8)
14946 .sr(1)
14947 .m(2)
14948 .n(4)
14949 .k(8)
14950 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014951 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014952 }
14953
14954 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, strided_cm) {
14955 TEST_REQUIRES_X86_XOP;
14956 GemmMicrokernelTester()
14957 .mr(2)
14958 .nr(4)
14959 .kr(8)
14960 .sr(1)
14961 .m(2)
14962 .n(4)
14963 .k(8)
14964 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080014965 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014966 }
14967
14968 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, no_a_zero_point) {
14969 TEST_REQUIRES_X86_XOP;
14970 for (size_t k = 1; k <= 40; k += 9) {
14971 GemmMicrokernelTester()
14972 .mr(2)
14973 .nr(4)
14974 .kr(8)
14975 .sr(1)
14976 .m(2)
14977 .n(4)
14978 .k(k)
14979 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080014980 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014981 }
14982 }
14983
14984 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, no_b_zero_point) {
14985 TEST_REQUIRES_X86_XOP;
14986 for (size_t k = 1; k <= 40; k += 9) {
14987 GemmMicrokernelTester()
14988 .mr(2)
14989 .nr(4)
14990 .kr(8)
14991 .sr(1)
14992 .m(2)
14993 .n(4)
14994 .k(k)
14995 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080014996 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070014997 }
14998 }
14999
15000 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, no_zero_point) {
15001 TEST_REQUIRES_X86_XOP;
15002 for (size_t k = 1; k <= 40; k += 9) {
15003 GemmMicrokernelTester()
15004 .mr(2)
15005 .nr(4)
15006 .kr(8)
15007 .sr(1)
15008 .m(2)
15009 .n(4)
15010 .k(k)
15011 .a_zero_point(0)
15012 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080015013 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015014 }
15015 }
15016#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
15017
15018
15019#if XNN_ARCH_X86 || XNN_ARCH_X86_64
15020 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_eq_8) {
15021 TEST_REQUIRES_X86_XOP;
15022 GemmMicrokernelTester()
15023 .mr(3)
15024 .nr(4)
15025 .kr(8)
15026 .sr(1)
15027 .m(3)
15028 .n(4)
15029 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080015030 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015031 }
15032
15033 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, strided_cn) {
15034 TEST_REQUIRES_X86_XOP;
15035 GemmMicrokernelTester()
15036 .mr(3)
15037 .nr(4)
15038 .kr(8)
15039 .sr(1)
15040 .m(3)
15041 .n(4)
15042 .k(8)
15043 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080015044 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015045 }
15046
15047 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_eq_8_subtile) {
15048 TEST_REQUIRES_X86_XOP;
Zhi An Ng83844ae2022-01-14 09:52:25 -080015049 for (uint32_t n = 1; n <= 4; n++) {
15050 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015051 GemmMicrokernelTester()
15052 .mr(3)
15053 .nr(4)
15054 .kr(8)
15055 .sr(1)
15056 .m(m)
15057 .n(n)
15058 .k(8)
15059 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015060 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015061 }
15062 }
15063 }
15064
15065 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_eq_8_subtile_m) {
15066 TEST_REQUIRES_X86_XOP;
15067 for (uint32_t m = 1; m <= 3; m++) {
15068 GemmMicrokernelTester()
15069 .mr(3)
15070 .nr(4)
15071 .kr(8)
15072 .sr(1)
15073 .m(m)
15074 .n(4)
15075 .k(8)
15076 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015077 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015078 }
15079 }
15080
15081 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_eq_8_subtile_n) {
15082 TEST_REQUIRES_X86_XOP;
15083 for (uint32_t n = 1; n <= 4; n++) {
15084 GemmMicrokernelTester()
15085 .mr(3)
15086 .nr(4)
15087 .kr(8)
15088 .sr(1)
15089 .m(3)
15090 .n(n)
15091 .k(8)
15092 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015093 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015094 }
15095 }
15096
15097 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_lt_8) {
15098 TEST_REQUIRES_X86_XOP;
15099 for (size_t k = 1; k < 8; k++) {
15100 GemmMicrokernelTester()
15101 .mr(3)
15102 .nr(4)
15103 .kr(8)
15104 .sr(1)
15105 .m(3)
15106 .n(4)
15107 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080015108 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015109 }
15110 }
15111
15112 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_lt_8_subtile) {
15113 TEST_REQUIRES_X86_XOP;
15114 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015115 for (uint32_t n = 1; n <= 4; n++) {
15116 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015117 GemmMicrokernelTester()
15118 .mr(3)
15119 .nr(4)
15120 .kr(8)
15121 .sr(1)
15122 .m(m)
15123 .n(n)
15124 .k(k)
15125 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015126 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015127 }
15128 }
15129 }
15130 }
15131
15132 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_gt_8) {
15133 TEST_REQUIRES_X86_XOP;
15134 for (size_t k = 9; k < 16; k++) {
15135 GemmMicrokernelTester()
15136 .mr(3)
15137 .nr(4)
15138 .kr(8)
15139 .sr(1)
15140 .m(3)
15141 .n(4)
15142 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080015143 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015144 }
15145 }
15146
15147 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_gt_8_subtile) {
15148 TEST_REQUIRES_X86_XOP;
15149 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015150 for (uint32_t n = 1; n <= 4; n++) {
15151 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015152 GemmMicrokernelTester()
15153 .mr(3)
15154 .nr(4)
15155 .kr(8)
15156 .sr(1)
15157 .m(m)
15158 .n(n)
15159 .k(k)
15160 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015161 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015162 }
15163 }
15164 }
15165 }
15166
15167 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_div_8) {
15168 TEST_REQUIRES_X86_XOP;
15169 for (size_t k = 16; k <= 80; k += 8) {
15170 GemmMicrokernelTester()
15171 .mr(3)
15172 .nr(4)
15173 .kr(8)
15174 .sr(1)
15175 .m(3)
15176 .n(4)
15177 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080015178 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015179 }
15180 }
15181
15182 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_div_8_subtile) {
15183 TEST_REQUIRES_X86_XOP;
15184 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015185 for (uint32_t n = 1; n <= 4; n++) {
15186 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015187 GemmMicrokernelTester()
15188 .mr(3)
15189 .nr(4)
15190 .kr(8)
15191 .sr(1)
15192 .m(m)
15193 .n(n)
15194 .k(k)
15195 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015196 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015197 }
15198 }
15199 }
15200 }
15201
15202 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_gt_4) {
15203 TEST_REQUIRES_X86_XOP;
15204 for (uint32_t n = 5; n < 8; n++) {
15205 for (size_t k = 1; k <= 40; k += 9) {
15206 GemmMicrokernelTester()
15207 .mr(3)
15208 .nr(4)
15209 .kr(8)
15210 .sr(1)
15211 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015212 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015213 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080015214 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015215 }
15216 }
15217 }
15218
15219 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_gt_4_strided_cn) {
15220 TEST_REQUIRES_X86_XOP;
15221 for (uint32_t n = 5; n < 8; n++) {
15222 for (size_t k = 1; k <= 40; k += 9) {
15223 GemmMicrokernelTester()
15224 .mr(3)
15225 .nr(4)
15226 .kr(8)
15227 .sr(1)
15228 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015229 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015230 .k(k)
15231 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080015232 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015233 }
15234 }
15235 }
15236
15237 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_gt_4_subtile) {
15238 TEST_REQUIRES_X86_XOP;
15239 for (uint32_t n = 5; n < 8; n++) {
15240 for (size_t k = 1; k <= 40; k += 9) {
15241 for (uint32_t m = 1; m <= 3; m++) {
15242 GemmMicrokernelTester()
15243 .mr(3)
15244 .nr(4)
15245 .kr(8)
15246 .sr(1)
15247 .m(m)
15248 .n(n)
15249 .k(k)
15250 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015251 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015252 }
15253 }
15254 }
15255 }
15256
15257 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_div_4) {
15258 TEST_REQUIRES_X86_XOP;
15259 for (uint32_t n = 8; n <= 12; n += 4) {
15260 for (size_t k = 1; k <= 40; k += 9) {
15261 GemmMicrokernelTester()
15262 .mr(3)
15263 .nr(4)
15264 .kr(8)
15265 .sr(1)
15266 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015267 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015268 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080015269 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015270 }
15271 }
15272 }
15273
15274 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_div_4_strided_cn) {
15275 TEST_REQUIRES_X86_XOP;
15276 for (uint32_t n = 8; n <= 12; n += 4) {
15277 for (size_t k = 1; k <= 40; k += 9) {
15278 GemmMicrokernelTester()
15279 .mr(3)
15280 .nr(4)
15281 .kr(8)
15282 .sr(1)
15283 .m(3)
15284 .n(n)
15285 .k(k)
15286 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080015287 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015288 }
15289 }
15290 }
15291
15292 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_div_4_subtile) {
15293 TEST_REQUIRES_X86_XOP;
15294 for (uint32_t n = 8; n <= 12; n += 4) {
15295 for (size_t k = 1; k <= 40; k += 9) {
15296 for (uint32_t m = 1; m <= 3; m++) {
15297 GemmMicrokernelTester()
15298 .mr(3)
15299 .nr(4)
15300 .kr(8)
15301 .sr(1)
15302 .m(m)
15303 .n(n)
15304 .k(k)
15305 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015306 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015307 }
15308 }
15309 }
15310 }
15311
15312 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, small_kernel) {
15313 TEST_REQUIRES_X86_XOP;
15314 for (size_t k = 1; k <= 40; k += 9) {
15315 GemmMicrokernelTester()
15316 .mr(3)
15317 .nr(4)
15318 .kr(8)
15319 .sr(1)
15320 .m(3)
15321 .n(4)
15322 .k(k)
15323 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080015324 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015325 }
15326 }
15327
15328 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, small_kernel_subtile) {
15329 TEST_REQUIRES_X86_XOP;
15330 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015331 for (uint32_t n = 1; n <= 4; n++) {
15332 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015333 GemmMicrokernelTester()
15334 .mr(3)
15335 .nr(4)
15336 .kr(8)
15337 .sr(1)
15338 .m(m)
15339 .n(n)
15340 .k(k)
15341 .ks(3)
15342 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015343 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015344 }
15345 }
15346 }
15347 }
15348
15349 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_gt_4_small_kernel) {
15350 TEST_REQUIRES_X86_XOP;
15351 for (uint32_t n = 5; n < 8; n++) {
15352 for (size_t k = 1; k <= 40; k += 9) {
15353 GemmMicrokernelTester()
15354 .mr(3)
15355 .nr(4)
15356 .kr(8)
15357 .sr(1)
15358 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015359 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015360 .k(k)
15361 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080015362 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015363 }
15364 }
15365 }
15366
15367 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_div_4_small_kernel) {
15368 TEST_REQUIRES_X86_XOP;
15369 for (uint32_t n = 8; n <= 12; n += 4) {
15370 for (size_t k = 1; k <= 40; k += 9) {
15371 GemmMicrokernelTester()
15372 .mr(3)
15373 .nr(4)
15374 .kr(8)
15375 .sr(1)
15376 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015377 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015378 .k(k)
15379 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080015380 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015381 }
15382 }
15383 }
15384
15385 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, strided_cm_subtile) {
15386 TEST_REQUIRES_X86_XOP;
15387 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015388 for (uint32_t n = 1; n <= 4; n++) {
15389 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015390 GemmMicrokernelTester()
15391 .mr(3)
15392 .nr(4)
15393 .kr(8)
15394 .sr(1)
15395 .m(m)
15396 .n(n)
15397 .k(k)
15398 .cm_stride(7)
15399 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015400 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015401 }
15402 }
15403 }
15404 }
15405
15406 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, a_offset) {
15407 TEST_REQUIRES_X86_XOP;
15408 for (size_t k = 1; k <= 40; k += 9) {
15409 GemmMicrokernelTester()
15410 .mr(3)
15411 .nr(4)
15412 .kr(8)
15413 .sr(1)
15414 .m(3)
15415 .n(4)
15416 .k(k)
15417 .ks(3)
15418 .a_offset(127)
Marat Dukhan50323b82022-01-11 00:12:01 -080015419 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015420 }
15421 }
15422
15423 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, zero) {
15424 TEST_REQUIRES_X86_XOP;
Zhi An Ng83844ae2022-01-14 09:52:25 -080015425 for (size_t k = 1; k <= 40; k += 9) {
15426 for (uint32_t mz = 0; mz < 3; mz++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015427 GemmMicrokernelTester()
15428 .mr(3)
15429 .nr(4)
15430 .kr(8)
15431 .sr(1)
15432 .m(3)
15433 .n(4)
15434 .k(k)
15435 .ks(3)
15436 .a_offset(127)
15437 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080015438 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015439 }
15440 }
15441 }
15442
15443 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, qmin) {
15444 TEST_REQUIRES_X86_XOP;
15445 GemmMicrokernelTester()
15446 .mr(3)
15447 .nr(4)
15448 .kr(8)
15449 .sr(1)
15450 .m(3)
15451 .n(4)
15452 .k(8)
15453 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015454 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015455 }
15456
15457 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, qmax) {
15458 TEST_REQUIRES_X86_XOP;
15459 GemmMicrokernelTester()
15460 .mr(3)
15461 .nr(4)
15462 .kr(8)
15463 .sr(1)
15464 .m(3)
15465 .n(4)
15466 .k(8)
15467 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015468 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015469 }
15470
15471 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, strided_cm) {
15472 TEST_REQUIRES_X86_XOP;
15473 GemmMicrokernelTester()
15474 .mr(3)
15475 .nr(4)
15476 .kr(8)
15477 .sr(1)
15478 .m(3)
15479 .n(4)
15480 .k(8)
15481 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080015482 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015483 }
15484
15485 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, no_a_zero_point) {
15486 TEST_REQUIRES_X86_XOP;
15487 for (size_t k = 1; k <= 40; k += 9) {
15488 GemmMicrokernelTester()
15489 .mr(3)
15490 .nr(4)
15491 .kr(8)
15492 .sr(1)
15493 .m(3)
15494 .n(4)
15495 .k(k)
15496 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080015497 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015498 }
15499 }
15500
15501 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, no_b_zero_point) {
15502 TEST_REQUIRES_X86_XOP;
15503 for (size_t k = 1; k <= 40; k += 9) {
15504 GemmMicrokernelTester()
15505 .mr(3)
15506 .nr(4)
15507 .kr(8)
15508 .sr(1)
15509 .m(3)
15510 .n(4)
15511 .k(k)
15512 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080015513 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015514 }
15515 }
15516
15517 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, no_zero_point) {
15518 TEST_REQUIRES_X86_XOP;
15519 for (size_t k = 1; k <= 40; k += 9) {
15520 GemmMicrokernelTester()
15521 .mr(3)
15522 .nr(4)
15523 .kr(8)
15524 .sr(1)
15525 .m(3)
15526 .n(4)
15527 .k(k)
15528 .a_zero_point(0)
15529 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080015530 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015531 }
15532 }
15533#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
15534
15535
15536#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015537 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_eq_8) {
15538 TEST_REQUIRES_X86_SSE2;
15539 GemmMicrokernelTester()
15540 .mr(3)
15541 .nr(4)
15542 .kr(8)
15543 .sr(1)
15544 .m(3)
15545 .n(4)
15546 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080015547 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015548 }
15549
15550 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, strided_cn) {
15551 TEST_REQUIRES_X86_SSE2;
15552 GemmMicrokernelTester()
15553 .mr(3)
15554 .nr(4)
15555 .kr(8)
15556 .sr(1)
15557 .m(3)
15558 .n(4)
15559 .k(8)
15560 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080015561 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015562 }
15563
15564 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_eq_8_subtile) {
15565 TEST_REQUIRES_X86_SSE2;
Zhi An Ng83844ae2022-01-14 09:52:25 -080015566 for (uint32_t n = 1; n <= 4; n++) {
15567 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015568 GemmMicrokernelTester()
15569 .mr(3)
15570 .nr(4)
15571 .kr(8)
15572 .sr(1)
15573 .m(m)
15574 .n(n)
15575 .k(8)
15576 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015577 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015578 }
15579 }
15580 }
15581
15582 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_eq_8_subtile_m) {
15583 TEST_REQUIRES_X86_SSE2;
15584 for (uint32_t m = 1; m <= 3; m++) {
15585 GemmMicrokernelTester()
15586 .mr(3)
15587 .nr(4)
15588 .kr(8)
15589 .sr(1)
15590 .m(m)
15591 .n(4)
15592 .k(8)
15593 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015594 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015595 }
15596 }
15597
15598 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_eq_8_subtile_n) {
15599 TEST_REQUIRES_X86_SSE2;
15600 for (uint32_t n = 1; n <= 4; n++) {
15601 GemmMicrokernelTester()
15602 .mr(3)
15603 .nr(4)
15604 .kr(8)
15605 .sr(1)
15606 .m(3)
15607 .n(n)
15608 .k(8)
15609 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015610 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015611 }
15612 }
15613
15614 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_lt_8) {
15615 TEST_REQUIRES_X86_SSE2;
15616 for (size_t k = 1; k < 8; k++) {
15617 GemmMicrokernelTester()
15618 .mr(3)
15619 .nr(4)
15620 .kr(8)
15621 .sr(1)
15622 .m(3)
15623 .n(4)
15624 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080015625 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015626 }
15627 }
15628
15629 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_lt_8_subtile) {
15630 TEST_REQUIRES_X86_SSE2;
15631 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015632 for (uint32_t n = 1; n <= 4; n++) {
15633 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015634 GemmMicrokernelTester()
15635 .mr(3)
15636 .nr(4)
15637 .kr(8)
15638 .sr(1)
15639 .m(m)
15640 .n(n)
15641 .k(k)
15642 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015643 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015644 }
15645 }
15646 }
15647 }
15648
15649 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_gt_8) {
15650 TEST_REQUIRES_X86_SSE2;
15651 for (size_t k = 9; k < 16; k++) {
15652 GemmMicrokernelTester()
15653 .mr(3)
15654 .nr(4)
15655 .kr(8)
15656 .sr(1)
15657 .m(3)
15658 .n(4)
15659 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080015660 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015661 }
15662 }
15663
15664 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_gt_8_subtile) {
15665 TEST_REQUIRES_X86_SSE2;
15666 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015667 for (uint32_t n = 1; n <= 4; n++) {
15668 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015669 GemmMicrokernelTester()
15670 .mr(3)
15671 .nr(4)
15672 .kr(8)
15673 .sr(1)
15674 .m(m)
15675 .n(n)
15676 .k(k)
15677 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015678 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015679 }
15680 }
15681 }
15682 }
15683
15684 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_div_8) {
15685 TEST_REQUIRES_X86_SSE2;
15686 for (size_t k = 16; k <= 80; k += 8) {
15687 GemmMicrokernelTester()
15688 .mr(3)
15689 .nr(4)
15690 .kr(8)
15691 .sr(1)
15692 .m(3)
15693 .n(4)
15694 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080015695 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015696 }
15697 }
15698
15699 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_div_8_subtile) {
15700 TEST_REQUIRES_X86_SSE2;
15701 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015702 for (uint32_t n = 1; n <= 4; n++) {
15703 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015704 GemmMicrokernelTester()
15705 .mr(3)
15706 .nr(4)
15707 .kr(8)
15708 .sr(1)
15709 .m(m)
15710 .n(n)
15711 .k(k)
15712 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015713 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015714 }
15715 }
15716 }
15717 }
15718
15719 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, n_gt_4) {
15720 TEST_REQUIRES_X86_SSE2;
15721 for (uint32_t n = 5; n < 8; n++) {
15722 for (size_t k = 1; k <= 40; k += 9) {
15723 GemmMicrokernelTester()
15724 .mr(3)
15725 .nr(4)
15726 .kr(8)
15727 .sr(1)
15728 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015729 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015730 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080015731 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015732 }
15733 }
15734 }
15735
15736 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, n_gt_4_strided_cn) {
15737 TEST_REQUIRES_X86_SSE2;
15738 for (uint32_t n = 5; n < 8; n++) {
15739 for (size_t k = 1; k <= 40; k += 9) {
15740 GemmMicrokernelTester()
15741 .mr(3)
15742 .nr(4)
15743 .kr(8)
15744 .sr(1)
15745 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015746 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015747 .k(k)
15748 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080015749 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015750 }
15751 }
15752 }
15753
15754 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, n_gt_4_subtile) {
15755 TEST_REQUIRES_X86_SSE2;
15756 for (uint32_t n = 5; n < 8; n++) {
15757 for (size_t k = 1; k <= 40; k += 9) {
15758 for (uint32_t m = 1; m <= 3; m++) {
15759 GemmMicrokernelTester()
15760 .mr(3)
15761 .nr(4)
15762 .kr(8)
15763 .sr(1)
15764 .m(m)
15765 .n(n)
15766 .k(k)
15767 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015768 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015769 }
15770 }
15771 }
15772 }
15773
15774 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, n_div_4) {
15775 TEST_REQUIRES_X86_SSE2;
15776 for (uint32_t n = 8; n <= 12; n += 4) {
15777 for (size_t k = 1; k <= 40; k += 9) {
15778 GemmMicrokernelTester()
15779 .mr(3)
15780 .nr(4)
15781 .kr(8)
15782 .sr(1)
15783 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015784 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015785 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080015786 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015787 }
15788 }
15789 }
15790
15791 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, n_div_4_strided_cn) {
15792 TEST_REQUIRES_X86_SSE2;
15793 for (uint32_t n = 8; n <= 12; n += 4) {
15794 for (size_t k = 1; k <= 40; k += 9) {
15795 GemmMicrokernelTester()
15796 .mr(3)
15797 .nr(4)
15798 .kr(8)
15799 .sr(1)
15800 .m(3)
15801 .n(n)
15802 .k(k)
15803 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080015804 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015805 }
15806 }
15807 }
15808
15809 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, n_div_4_subtile) {
15810 TEST_REQUIRES_X86_SSE2;
15811 for (uint32_t n = 8; n <= 12; n += 4) {
15812 for (size_t k = 1; k <= 40; k += 9) {
15813 for (uint32_t m = 1; m <= 3; m++) {
15814 GemmMicrokernelTester()
15815 .mr(3)
15816 .nr(4)
15817 .kr(8)
15818 .sr(1)
15819 .m(m)
15820 .n(n)
15821 .k(k)
15822 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015823 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015824 }
15825 }
15826 }
15827 }
15828
15829 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, small_kernel) {
15830 TEST_REQUIRES_X86_SSE2;
15831 for (size_t k = 1; k <= 40; k += 9) {
15832 GemmMicrokernelTester()
15833 .mr(3)
15834 .nr(4)
15835 .kr(8)
15836 .sr(1)
15837 .m(3)
15838 .n(4)
15839 .k(k)
15840 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080015841 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015842 }
15843 }
15844
15845 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, small_kernel_subtile) {
15846 TEST_REQUIRES_X86_SSE2;
15847 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015848 for (uint32_t n = 1; n <= 4; n++) {
15849 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015850 GemmMicrokernelTester()
15851 .mr(3)
15852 .nr(4)
15853 .kr(8)
15854 .sr(1)
15855 .m(m)
15856 .n(n)
15857 .k(k)
15858 .ks(3)
15859 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015860 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015861 }
15862 }
15863 }
15864 }
15865
15866 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, n_gt_4_small_kernel) {
15867 TEST_REQUIRES_X86_SSE2;
15868 for (uint32_t n = 5; n < 8; n++) {
15869 for (size_t k = 1; k <= 40; k += 9) {
15870 GemmMicrokernelTester()
15871 .mr(3)
15872 .nr(4)
15873 .kr(8)
15874 .sr(1)
15875 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015876 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015877 .k(k)
15878 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080015879 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015880 }
15881 }
15882 }
15883
15884 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, n_div_4_small_kernel) {
15885 TEST_REQUIRES_X86_SSE2;
15886 for (uint32_t n = 8; n <= 12; n += 4) {
15887 for (size_t k = 1; k <= 40; k += 9) {
15888 GemmMicrokernelTester()
15889 .mr(3)
15890 .nr(4)
15891 .kr(8)
15892 .sr(1)
15893 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015894 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015895 .k(k)
15896 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080015897 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015898 }
15899 }
15900 }
15901
15902 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, strided_cm_subtile) {
15903 TEST_REQUIRES_X86_SSE2;
15904 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015905 for (uint32_t n = 1; n <= 4; n++) {
15906 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015907 GemmMicrokernelTester()
15908 .mr(3)
15909 .nr(4)
15910 .kr(8)
15911 .sr(1)
15912 .m(m)
15913 .n(n)
15914 .k(k)
15915 .cm_stride(7)
15916 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015917 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015918 }
15919 }
15920 }
15921 }
15922
15923 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, a_offset) {
15924 TEST_REQUIRES_X86_SSE2;
15925 for (size_t k = 1; k <= 40; k += 9) {
15926 GemmMicrokernelTester()
15927 .mr(3)
15928 .nr(4)
15929 .kr(8)
15930 .sr(1)
15931 .m(3)
15932 .n(4)
15933 .k(k)
15934 .ks(3)
15935 .a_offset(127)
Marat Dukhan50323b82022-01-11 00:12:01 -080015936 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015937 }
15938 }
15939
15940 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, zero) {
15941 TEST_REQUIRES_X86_SSE2;
Zhi An Ng83844ae2022-01-14 09:52:25 -080015942 for (size_t k = 1; k <= 40; k += 9) {
15943 for (uint32_t mz = 0; mz < 3; mz++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015944 GemmMicrokernelTester()
15945 .mr(3)
15946 .nr(4)
15947 .kr(8)
15948 .sr(1)
15949 .m(3)
15950 .n(4)
15951 .k(k)
15952 .ks(3)
15953 .a_offset(127)
15954 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080015955 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015956 }
15957 }
15958 }
15959
15960 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, qmin) {
15961 TEST_REQUIRES_X86_SSE2;
15962 GemmMicrokernelTester()
15963 .mr(3)
15964 .nr(4)
15965 .kr(8)
15966 .sr(1)
15967 .m(3)
15968 .n(4)
15969 .k(8)
15970 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015971 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015972 }
15973
15974 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, qmax) {
15975 TEST_REQUIRES_X86_SSE2;
15976 GemmMicrokernelTester()
15977 .mr(3)
15978 .nr(4)
15979 .kr(8)
15980 .sr(1)
15981 .m(3)
15982 .n(4)
15983 .k(8)
15984 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015985 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070015986 }
15987
15988 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, strided_cm) {
15989 TEST_REQUIRES_X86_SSE2;
15990 GemmMicrokernelTester()
15991 .mr(3)
15992 .nr(4)
15993 .kr(8)
15994 .sr(1)
15995 .m(3)
15996 .n(4)
15997 .k(8)
15998 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080015999 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016000 }
16001
16002 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, no_a_zero_point) {
16003 TEST_REQUIRES_X86_SSE2;
16004 for (size_t k = 1; k <= 40; k += 9) {
16005 GemmMicrokernelTester()
16006 .mr(3)
16007 .nr(4)
16008 .kr(8)
16009 .sr(1)
16010 .m(3)
16011 .n(4)
16012 .k(k)
16013 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080016014 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016015 }
16016 }
16017
16018 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, no_b_zero_point) {
16019 TEST_REQUIRES_X86_SSE2;
16020 for (size_t k = 1; k <= 40; k += 9) {
16021 GemmMicrokernelTester()
16022 .mr(3)
16023 .nr(4)
16024 .kr(8)
16025 .sr(1)
16026 .m(3)
16027 .n(4)
16028 .k(k)
16029 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080016030 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016031 }
16032 }
16033
16034 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, no_zero_point) {
16035 TEST_REQUIRES_X86_SSE2;
16036 for (size_t k = 1; k <= 40; k += 9) {
16037 GemmMicrokernelTester()
16038 .mr(3)
16039 .nr(4)
16040 .kr(8)
16041 .sr(1)
16042 .m(3)
16043 .n(4)
16044 .k(k)
16045 .a_zero_point(0)
16046 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080016047 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016048 }
16049 }
16050#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
16051
16052
16053#if XNN_ARCH_X86 || XNN_ARCH_X86_64
16054 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_eq_8) {
16055 TEST_REQUIRES_X86_SSE41;
16056 GemmMicrokernelTester()
16057 .mr(1)
16058 .nr(4)
16059 .kr(8)
16060 .sr(1)
16061 .m(1)
16062 .n(4)
16063 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080016064 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016065 }
16066
16067 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, strided_cn) {
16068 TEST_REQUIRES_X86_SSE41;
16069 GemmMicrokernelTester()
16070 .mr(1)
16071 .nr(4)
16072 .kr(8)
16073 .sr(1)
16074 .m(1)
16075 .n(4)
16076 .k(8)
16077 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080016078 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016079 }
16080
16081 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_eq_8_subtile) {
16082 TEST_REQUIRES_X86_SSE41;
Zhi An Ng83844ae2022-01-14 09:52:25 -080016083 for (uint32_t n = 1; n <= 4; n++) {
16084 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016085 GemmMicrokernelTester()
16086 .mr(1)
16087 .nr(4)
16088 .kr(8)
16089 .sr(1)
16090 .m(m)
16091 .n(n)
16092 .k(8)
16093 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016094 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016095 }
16096 }
16097 }
16098
16099 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_eq_8_subtile_m) {
16100 TEST_REQUIRES_X86_SSE41;
16101 for (uint32_t m = 1; m <= 1; m++) {
16102 GemmMicrokernelTester()
16103 .mr(1)
16104 .nr(4)
16105 .kr(8)
16106 .sr(1)
16107 .m(m)
16108 .n(4)
16109 .k(8)
16110 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016111 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016112 }
16113 }
16114
16115 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_eq_8_subtile_n) {
16116 TEST_REQUIRES_X86_SSE41;
16117 for (uint32_t n = 1; n <= 4; n++) {
16118 GemmMicrokernelTester()
16119 .mr(1)
16120 .nr(4)
16121 .kr(8)
16122 .sr(1)
16123 .m(1)
16124 .n(n)
16125 .k(8)
16126 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016127 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016128 }
16129 }
16130
16131 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_lt_8) {
16132 TEST_REQUIRES_X86_SSE41;
16133 for (size_t k = 1; k < 8; k++) {
16134 GemmMicrokernelTester()
16135 .mr(1)
16136 .nr(4)
16137 .kr(8)
16138 .sr(1)
16139 .m(1)
16140 .n(4)
16141 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080016142 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016143 }
16144 }
16145
16146 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_lt_8_subtile) {
16147 TEST_REQUIRES_X86_SSE41;
16148 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016149 for (uint32_t n = 1; n <= 4; n++) {
16150 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016151 GemmMicrokernelTester()
16152 .mr(1)
16153 .nr(4)
16154 .kr(8)
16155 .sr(1)
16156 .m(m)
16157 .n(n)
16158 .k(k)
16159 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016160 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016161 }
16162 }
16163 }
16164 }
16165
16166 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_gt_8) {
16167 TEST_REQUIRES_X86_SSE41;
16168 for (size_t k = 9; k < 16; k++) {
16169 GemmMicrokernelTester()
16170 .mr(1)
16171 .nr(4)
16172 .kr(8)
16173 .sr(1)
16174 .m(1)
16175 .n(4)
16176 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080016177 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016178 }
16179 }
16180
16181 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_gt_8_subtile) {
16182 TEST_REQUIRES_X86_SSE41;
16183 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016184 for (uint32_t n = 1; n <= 4; n++) {
16185 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016186 GemmMicrokernelTester()
16187 .mr(1)
16188 .nr(4)
16189 .kr(8)
16190 .sr(1)
16191 .m(m)
16192 .n(n)
16193 .k(k)
16194 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016195 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016196 }
16197 }
16198 }
16199 }
16200
16201 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_div_8) {
16202 TEST_REQUIRES_X86_SSE41;
16203 for (size_t k = 16; k <= 80; k += 8) {
16204 GemmMicrokernelTester()
16205 .mr(1)
16206 .nr(4)
16207 .kr(8)
16208 .sr(1)
16209 .m(1)
16210 .n(4)
16211 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080016212 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016213 }
16214 }
16215
16216 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_div_8_subtile) {
16217 TEST_REQUIRES_X86_SSE41;
16218 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016219 for (uint32_t n = 1; n <= 4; n++) {
16220 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016221 GemmMicrokernelTester()
16222 .mr(1)
16223 .nr(4)
16224 .kr(8)
16225 .sr(1)
16226 .m(m)
16227 .n(n)
16228 .k(k)
16229 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016230 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016231 }
16232 }
16233 }
16234 }
16235
16236 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_gt_4) {
16237 TEST_REQUIRES_X86_SSE41;
16238 for (uint32_t n = 5; n < 8; n++) {
16239 for (size_t k = 1; k <= 40; k += 9) {
16240 GemmMicrokernelTester()
16241 .mr(1)
16242 .nr(4)
16243 .kr(8)
16244 .sr(1)
16245 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016246 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016247 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080016248 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016249 }
16250 }
16251 }
16252
16253 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_gt_4_strided_cn) {
16254 TEST_REQUIRES_X86_SSE41;
16255 for (uint32_t n = 5; n < 8; n++) {
16256 for (size_t k = 1; k <= 40; k += 9) {
16257 GemmMicrokernelTester()
16258 .mr(1)
16259 .nr(4)
16260 .kr(8)
16261 .sr(1)
16262 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016263 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016264 .k(k)
16265 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080016266 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016267 }
16268 }
16269 }
16270
16271 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_gt_4_subtile) {
16272 TEST_REQUIRES_X86_SSE41;
16273 for (uint32_t n = 5; n < 8; n++) {
16274 for (size_t k = 1; k <= 40; k += 9) {
16275 for (uint32_t m = 1; m <= 1; m++) {
16276 GemmMicrokernelTester()
16277 .mr(1)
16278 .nr(4)
16279 .kr(8)
16280 .sr(1)
16281 .m(m)
16282 .n(n)
16283 .k(k)
16284 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016285 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016286 }
16287 }
16288 }
16289 }
16290
16291 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_div_4) {
16292 TEST_REQUIRES_X86_SSE41;
16293 for (uint32_t n = 8; n <= 12; n += 4) {
16294 for (size_t k = 1; k <= 40; k += 9) {
16295 GemmMicrokernelTester()
16296 .mr(1)
16297 .nr(4)
16298 .kr(8)
16299 .sr(1)
16300 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016301 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016302 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080016303 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016304 }
16305 }
16306 }
16307
16308 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_div_4_strided_cn) {
16309 TEST_REQUIRES_X86_SSE41;
16310 for (uint32_t n = 8; n <= 12; n += 4) {
16311 for (size_t k = 1; k <= 40; k += 9) {
16312 GemmMicrokernelTester()
16313 .mr(1)
16314 .nr(4)
16315 .kr(8)
16316 .sr(1)
16317 .m(1)
16318 .n(n)
16319 .k(k)
16320 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080016321 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016322 }
16323 }
16324 }
16325
16326 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_div_4_subtile) {
16327 TEST_REQUIRES_X86_SSE41;
16328 for (uint32_t n = 8; n <= 12; n += 4) {
16329 for (size_t k = 1; k <= 40; k += 9) {
16330 for (uint32_t m = 1; m <= 1; m++) {
16331 GemmMicrokernelTester()
16332 .mr(1)
16333 .nr(4)
16334 .kr(8)
16335 .sr(1)
16336 .m(m)
16337 .n(n)
16338 .k(k)
16339 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016340 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016341 }
16342 }
16343 }
16344 }
16345
16346 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, small_kernel) {
16347 TEST_REQUIRES_X86_SSE41;
16348 for (size_t k = 1; k <= 40; k += 9) {
16349 GemmMicrokernelTester()
16350 .mr(1)
16351 .nr(4)
16352 .kr(8)
16353 .sr(1)
16354 .m(1)
16355 .n(4)
16356 .k(k)
16357 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080016358 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016359 }
16360 }
16361
16362 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, small_kernel_subtile) {
16363 TEST_REQUIRES_X86_SSE41;
16364 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016365 for (uint32_t n = 1; n <= 4; n++) {
16366 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016367 GemmMicrokernelTester()
16368 .mr(1)
16369 .nr(4)
16370 .kr(8)
16371 .sr(1)
16372 .m(m)
16373 .n(n)
16374 .k(k)
16375 .ks(3)
16376 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016377 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016378 }
16379 }
16380 }
16381 }
16382
16383 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_gt_4_small_kernel) {
16384 TEST_REQUIRES_X86_SSE41;
16385 for (uint32_t n = 5; n < 8; n++) {
16386 for (size_t k = 1; k <= 40; k += 9) {
16387 GemmMicrokernelTester()
16388 .mr(1)
16389 .nr(4)
16390 .kr(8)
16391 .sr(1)
16392 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016393 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016394 .k(k)
16395 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080016396 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016397 }
16398 }
16399 }
16400
16401 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_div_4_small_kernel) {
16402 TEST_REQUIRES_X86_SSE41;
16403 for (uint32_t n = 8; n <= 12; n += 4) {
16404 for (size_t k = 1; k <= 40; k += 9) {
16405 GemmMicrokernelTester()
16406 .mr(1)
16407 .nr(4)
16408 .kr(8)
16409 .sr(1)
16410 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016411 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016412 .k(k)
16413 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080016414 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016415 }
16416 }
16417 }
16418
16419 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, strided_cm_subtile) {
16420 TEST_REQUIRES_X86_SSE41;
16421 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016422 for (uint32_t n = 1; n <= 4; n++) {
16423 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016424 GemmMicrokernelTester()
16425 .mr(1)
16426 .nr(4)
16427 .kr(8)
16428 .sr(1)
16429 .m(m)
16430 .n(n)
16431 .k(k)
16432 .cm_stride(7)
16433 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016434 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016435 }
16436 }
16437 }
16438 }
16439
16440 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, a_offset) {
16441 TEST_REQUIRES_X86_SSE41;
16442 for (size_t k = 1; k <= 40; k += 9) {
16443 GemmMicrokernelTester()
16444 .mr(1)
16445 .nr(4)
16446 .kr(8)
16447 .sr(1)
16448 .m(1)
16449 .n(4)
16450 .k(k)
16451 .ks(3)
16452 .a_offset(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080016453 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016454 }
16455 }
16456
16457 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, zero) {
16458 TEST_REQUIRES_X86_SSE41;
Zhi An Ng83844ae2022-01-14 09:52:25 -080016459 for (size_t k = 1; k <= 40; k += 9) {
16460 for (uint32_t mz = 0; mz < 1; mz++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016461 GemmMicrokernelTester()
16462 .mr(1)
16463 .nr(4)
16464 .kr(8)
16465 .sr(1)
16466 .m(1)
16467 .n(4)
16468 .k(k)
16469 .ks(3)
16470 .a_offset(43)
16471 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080016472 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016473 }
16474 }
16475 }
16476
16477 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, qmin) {
16478 TEST_REQUIRES_X86_SSE41;
16479 GemmMicrokernelTester()
16480 .mr(1)
16481 .nr(4)
16482 .kr(8)
16483 .sr(1)
16484 .m(1)
16485 .n(4)
16486 .k(8)
16487 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016488 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016489 }
16490
16491 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, qmax) {
16492 TEST_REQUIRES_X86_SSE41;
16493 GemmMicrokernelTester()
16494 .mr(1)
16495 .nr(4)
16496 .kr(8)
16497 .sr(1)
16498 .m(1)
16499 .n(4)
16500 .k(8)
16501 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016502 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016503 }
16504
16505 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, strided_cm) {
16506 TEST_REQUIRES_X86_SSE41;
16507 GemmMicrokernelTester()
16508 .mr(1)
16509 .nr(4)
16510 .kr(8)
16511 .sr(1)
16512 .m(1)
16513 .n(4)
16514 .k(8)
16515 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080016516 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016517 }
16518
16519 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, no_a_zero_point) {
16520 TEST_REQUIRES_X86_SSE41;
16521 for (size_t k = 1; k <= 40; k += 9) {
16522 GemmMicrokernelTester()
16523 .mr(1)
16524 .nr(4)
16525 .kr(8)
16526 .sr(1)
16527 .m(1)
16528 .n(4)
16529 .k(k)
16530 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080016531 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016532 }
16533 }
16534
16535 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, no_b_zero_point) {
16536 TEST_REQUIRES_X86_SSE41;
16537 for (size_t k = 1; k <= 40; k += 9) {
16538 GemmMicrokernelTester()
16539 .mr(1)
16540 .nr(4)
16541 .kr(8)
16542 .sr(1)
16543 .m(1)
16544 .n(4)
16545 .k(k)
16546 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080016547 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016548 }
16549 }
16550
16551 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, no_zero_point) {
16552 TEST_REQUIRES_X86_SSE41;
16553 for (size_t k = 1; k <= 40; k += 9) {
16554 GemmMicrokernelTester()
16555 .mr(1)
16556 .nr(4)
16557 .kr(8)
16558 .sr(1)
16559 .m(1)
16560 .n(4)
16561 .k(k)
16562 .a_zero_point(0)
16563 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080016564 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016565 }
16566 }
16567#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
16568
16569
16570#if XNN_ARCH_X86 || XNN_ARCH_X86_64
16571 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_eq_8) {
16572 TEST_REQUIRES_X86_SSE41;
16573 GemmMicrokernelTester()
16574 .mr(2)
16575 .nr(4)
16576 .kr(8)
16577 .sr(1)
16578 .m(2)
16579 .n(4)
16580 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080016581 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016582 }
16583
16584 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, strided_cn) {
16585 TEST_REQUIRES_X86_SSE41;
16586 GemmMicrokernelTester()
16587 .mr(2)
16588 .nr(4)
16589 .kr(8)
16590 .sr(1)
16591 .m(2)
16592 .n(4)
16593 .k(8)
16594 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080016595 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016596 }
16597
16598 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_eq_8_subtile) {
16599 TEST_REQUIRES_X86_SSE41;
Zhi An Ng83844ae2022-01-14 09:52:25 -080016600 for (uint32_t n = 1; n <= 4; n++) {
16601 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016602 GemmMicrokernelTester()
16603 .mr(2)
16604 .nr(4)
16605 .kr(8)
16606 .sr(1)
16607 .m(m)
16608 .n(n)
16609 .k(8)
16610 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016611 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016612 }
16613 }
16614 }
16615
16616 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_eq_8_subtile_m) {
16617 TEST_REQUIRES_X86_SSE41;
16618 for (uint32_t m = 1; m <= 2; m++) {
16619 GemmMicrokernelTester()
16620 .mr(2)
16621 .nr(4)
16622 .kr(8)
16623 .sr(1)
16624 .m(m)
16625 .n(4)
16626 .k(8)
16627 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016628 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016629 }
16630 }
16631
16632 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_eq_8_subtile_n) {
16633 TEST_REQUIRES_X86_SSE41;
16634 for (uint32_t n = 1; n <= 4; n++) {
16635 GemmMicrokernelTester()
16636 .mr(2)
16637 .nr(4)
16638 .kr(8)
16639 .sr(1)
16640 .m(2)
16641 .n(n)
16642 .k(8)
16643 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016644 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016645 }
16646 }
16647
16648 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_lt_8) {
16649 TEST_REQUIRES_X86_SSE41;
16650 for (size_t k = 1; k < 8; k++) {
16651 GemmMicrokernelTester()
16652 .mr(2)
16653 .nr(4)
16654 .kr(8)
16655 .sr(1)
16656 .m(2)
16657 .n(4)
16658 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080016659 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016660 }
16661 }
16662
16663 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_lt_8_subtile) {
16664 TEST_REQUIRES_X86_SSE41;
16665 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016666 for (uint32_t n = 1; n <= 4; n++) {
16667 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016668 GemmMicrokernelTester()
16669 .mr(2)
16670 .nr(4)
16671 .kr(8)
16672 .sr(1)
16673 .m(m)
16674 .n(n)
16675 .k(k)
16676 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016677 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016678 }
16679 }
16680 }
16681 }
16682
16683 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_gt_8) {
16684 TEST_REQUIRES_X86_SSE41;
16685 for (size_t k = 9; k < 16; k++) {
16686 GemmMicrokernelTester()
16687 .mr(2)
16688 .nr(4)
16689 .kr(8)
16690 .sr(1)
16691 .m(2)
16692 .n(4)
16693 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080016694 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016695 }
16696 }
16697
16698 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_gt_8_subtile) {
16699 TEST_REQUIRES_X86_SSE41;
16700 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016701 for (uint32_t n = 1; n <= 4; n++) {
16702 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016703 GemmMicrokernelTester()
16704 .mr(2)
16705 .nr(4)
16706 .kr(8)
16707 .sr(1)
16708 .m(m)
16709 .n(n)
16710 .k(k)
16711 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016712 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016713 }
16714 }
16715 }
16716 }
16717
16718 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_div_8) {
16719 TEST_REQUIRES_X86_SSE41;
16720 for (size_t k = 16; k <= 80; k += 8) {
16721 GemmMicrokernelTester()
16722 .mr(2)
16723 .nr(4)
16724 .kr(8)
16725 .sr(1)
16726 .m(2)
16727 .n(4)
16728 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080016729 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016730 }
16731 }
16732
16733 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_div_8_subtile) {
16734 TEST_REQUIRES_X86_SSE41;
16735 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016736 for (uint32_t n = 1; n <= 4; n++) {
16737 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016738 GemmMicrokernelTester()
16739 .mr(2)
16740 .nr(4)
16741 .kr(8)
16742 .sr(1)
16743 .m(m)
16744 .n(n)
16745 .k(k)
16746 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016747 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016748 }
16749 }
16750 }
16751 }
16752
16753 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_gt_4) {
16754 TEST_REQUIRES_X86_SSE41;
16755 for (uint32_t n = 5; n < 8; n++) {
16756 for (size_t k = 1; k <= 40; k += 9) {
16757 GemmMicrokernelTester()
16758 .mr(2)
16759 .nr(4)
16760 .kr(8)
16761 .sr(1)
16762 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016763 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016764 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080016765 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016766 }
16767 }
16768 }
16769
16770 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_gt_4_strided_cn) {
16771 TEST_REQUIRES_X86_SSE41;
16772 for (uint32_t n = 5; n < 8; n++) {
16773 for (size_t k = 1; k <= 40; k += 9) {
16774 GemmMicrokernelTester()
16775 .mr(2)
16776 .nr(4)
16777 .kr(8)
16778 .sr(1)
16779 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016780 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016781 .k(k)
16782 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080016783 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016784 }
16785 }
16786 }
16787
16788 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_gt_4_subtile) {
16789 TEST_REQUIRES_X86_SSE41;
16790 for (uint32_t n = 5; n < 8; n++) {
16791 for (size_t k = 1; k <= 40; k += 9) {
16792 for (uint32_t m = 1; m <= 2; m++) {
16793 GemmMicrokernelTester()
16794 .mr(2)
16795 .nr(4)
16796 .kr(8)
16797 .sr(1)
16798 .m(m)
16799 .n(n)
16800 .k(k)
16801 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016802 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016803 }
16804 }
16805 }
16806 }
16807
16808 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_div_4) {
16809 TEST_REQUIRES_X86_SSE41;
16810 for (uint32_t n = 8; n <= 12; n += 4) {
16811 for (size_t k = 1; k <= 40; k += 9) {
16812 GemmMicrokernelTester()
16813 .mr(2)
16814 .nr(4)
16815 .kr(8)
16816 .sr(1)
16817 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016818 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016819 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080016820 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016821 }
16822 }
16823 }
16824
16825 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_div_4_strided_cn) {
16826 TEST_REQUIRES_X86_SSE41;
16827 for (uint32_t n = 8; n <= 12; n += 4) {
16828 for (size_t k = 1; k <= 40; k += 9) {
16829 GemmMicrokernelTester()
16830 .mr(2)
16831 .nr(4)
16832 .kr(8)
16833 .sr(1)
16834 .m(2)
16835 .n(n)
16836 .k(k)
16837 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080016838 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016839 }
16840 }
16841 }
16842
16843 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_div_4_subtile) {
16844 TEST_REQUIRES_X86_SSE41;
16845 for (uint32_t n = 8; n <= 12; n += 4) {
16846 for (size_t k = 1; k <= 40; k += 9) {
16847 for (uint32_t m = 1; m <= 2; m++) {
16848 GemmMicrokernelTester()
16849 .mr(2)
16850 .nr(4)
16851 .kr(8)
16852 .sr(1)
16853 .m(m)
16854 .n(n)
16855 .k(k)
16856 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016857 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016858 }
16859 }
16860 }
16861 }
16862
16863 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, small_kernel) {
16864 TEST_REQUIRES_X86_SSE41;
16865 for (size_t k = 1; k <= 40; k += 9) {
16866 GemmMicrokernelTester()
16867 .mr(2)
16868 .nr(4)
16869 .kr(8)
16870 .sr(1)
16871 .m(2)
16872 .n(4)
16873 .k(k)
16874 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080016875 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016876 }
16877 }
16878
16879 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, small_kernel_subtile) {
16880 TEST_REQUIRES_X86_SSE41;
16881 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016882 for (uint32_t n = 1; n <= 4; n++) {
16883 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016884 GemmMicrokernelTester()
16885 .mr(2)
16886 .nr(4)
16887 .kr(8)
16888 .sr(1)
16889 .m(m)
16890 .n(n)
16891 .k(k)
16892 .ks(3)
16893 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016894 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016895 }
16896 }
16897 }
16898 }
16899
16900 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_gt_4_small_kernel) {
16901 TEST_REQUIRES_X86_SSE41;
16902 for (uint32_t n = 5; n < 8; n++) {
16903 for (size_t k = 1; k <= 40; k += 9) {
16904 GemmMicrokernelTester()
16905 .mr(2)
16906 .nr(4)
16907 .kr(8)
16908 .sr(1)
16909 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016910 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016911 .k(k)
16912 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080016913 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016914 }
16915 }
16916 }
16917
16918 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_div_4_small_kernel) {
16919 TEST_REQUIRES_X86_SSE41;
16920 for (uint32_t n = 8; n <= 12; n += 4) {
16921 for (size_t k = 1; k <= 40; k += 9) {
16922 GemmMicrokernelTester()
16923 .mr(2)
16924 .nr(4)
16925 .kr(8)
16926 .sr(1)
16927 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016928 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016929 .k(k)
16930 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080016931 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016932 }
16933 }
16934 }
16935
16936 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, strided_cm_subtile) {
16937 TEST_REQUIRES_X86_SSE41;
16938 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016939 for (uint32_t n = 1; n <= 4; n++) {
16940 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016941 GemmMicrokernelTester()
16942 .mr(2)
16943 .nr(4)
16944 .kr(8)
16945 .sr(1)
16946 .m(m)
16947 .n(n)
16948 .k(k)
16949 .cm_stride(7)
16950 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016951 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016952 }
16953 }
16954 }
16955 }
16956
16957 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, a_offset) {
16958 TEST_REQUIRES_X86_SSE41;
16959 for (size_t k = 1; k <= 40; k += 9) {
16960 GemmMicrokernelTester()
16961 .mr(2)
16962 .nr(4)
16963 .kr(8)
16964 .sr(1)
16965 .m(2)
16966 .n(4)
16967 .k(k)
16968 .ks(3)
16969 .a_offset(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080016970 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016971 }
16972 }
16973
16974 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, zero) {
16975 TEST_REQUIRES_X86_SSE41;
Zhi An Ng83844ae2022-01-14 09:52:25 -080016976 for (size_t k = 1; k <= 40; k += 9) {
16977 for (uint32_t mz = 0; mz < 2; mz++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016978 GemmMicrokernelTester()
16979 .mr(2)
16980 .nr(4)
16981 .kr(8)
16982 .sr(1)
16983 .m(2)
16984 .n(4)
16985 .k(k)
16986 .ks(3)
16987 .a_offset(83)
16988 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080016989 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070016990 }
16991 }
16992 }
16993
16994 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, qmin) {
16995 TEST_REQUIRES_X86_SSE41;
16996 GemmMicrokernelTester()
16997 .mr(2)
16998 .nr(4)
16999 .kr(8)
17000 .sr(1)
17001 .m(2)
17002 .n(4)
17003 .k(8)
17004 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017005 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017006 }
17007
17008 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, qmax) {
17009 TEST_REQUIRES_X86_SSE41;
17010 GemmMicrokernelTester()
17011 .mr(2)
17012 .nr(4)
17013 .kr(8)
17014 .sr(1)
17015 .m(2)
17016 .n(4)
17017 .k(8)
17018 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017019 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017020 }
17021
17022 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, strided_cm) {
17023 TEST_REQUIRES_X86_SSE41;
17024 GemmMicrokernelTester()
17025 .mr(2)
17026 .nr(4)
17027 .kr(8)
17028 .sr(1)
17029 .m(2)
17030 .n(4)
17031 .k(8)
17032 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080017033 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017034 }
17035
17036 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, no_a_zero_point) {
17037 TEST_REQUIRES_X86_SSE41;
17038 for (size_t k = 1; k <= 40; k += 9) {
17039 GemmMicrokernelTester()
17040 .mr(2)
17041 .nr(4)
17042 .kr(8)
17043 .sr(1)
17044 .m(2)
17045 .n(4)
17046 .k(k)
17047 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080017048 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017049 }
17050 }
17051
17052 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, no_b_zero_point) {
17053 TEST_REQUIRES_X86_SSE41;
17054 for (size_t k = 1; k <= 40; k += 9) {
17055 GemmMicrokernelTester()
17056 .mr(2)
17057 .nr(4)
17058 .kr(8)
17059 .sr(1)
17060 .m(2)
17061 .n(4)
17062 .k(k)
17063 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080017064 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017065 }
17066 }
17067
17068 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, no_zero_point) {
17069 TEST_REQUIRES_X86_SSE41;
17070 for (size_t k = 1; k <= 40; k += 9) {
17071 GemmMicrokernelTester()
17072 .mr(2)
17073 .nr(4)
17074 .kr(8)
17075 .sr(1)
17076 .m(2)
17077 .n(4)
17078 .k(k)
17079 .a_zero_point(0)
17080 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080017081 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017082 }
17083 }
17084#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
17085
17086
17087#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017088 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_eq_8) {
17089 TEST_REQUIRES_X86_AVX;
17090 GemmMicrokernelTester()
17091 .mr(1)
17092 .nr(4)
17093 .kr(8)
17094 .sr(1)
17095 .m(1)
17096 .n(4)
17097 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080017098 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017099 }
17100
17101 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, strided_cn) {
17102 TEST_REQUIRES_X86_AVX;
17103 GemmMicrokernelTester()
17104 .mr(1)
17105 .nr(4)
17106 .kr(8)
17107 .sr(1)
17108 .m(1)
17109 .n(4)
17110 .k(8)
17111 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080017112 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017113 }
17114
17115 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_eq_8_subtile) {
17116 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080017117 for (uint32_t n = 1; n <= 4; n++) {
17118 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017119 GemmMicrokernelTester()
17120 .mr(1)
17121 .nr(4)
17122 .kr(8)
17123 .sr(1)
17124 .m(m)
17125 .n(n)
17126 .k(8)
17127 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017128 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017129 }
17130 }
17131 }
17132
17133 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_eq_8_subtile_m) {
17134 TEST_REQUIRES_X86_AVX;
17135 for (uint32_t m = 1; m <= 1; m++) {
17136 GemmMicrokernelTester()
17137 .mr(1)
17138 .nr(4)
17139 .kr(8)
17140 .sr(1)
17141 .m(m)
17142 .n(4)
17143 .k(8)
17144 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017145 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017146 }
17147 }
17148
17149 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_eq_8_subtile_n) {
17150 TEST_REQUIRES_X86_AVX;
17151 for (uint32_t n = 1; n <= 4; n++) {
17152 GemmMicrokernelTester()
17153 .mr(1)
17154 .nr(4)
17155 .kr(8)
17156 .sr(1)
17157 .m(1)
17158 .n(n)
17159 .k(8)
17160 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017161 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017162 }
17163 }
17164
17165 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_lt_8) {
17166 TEST_REQUIRES_X86_AVX;
17167 for (size_t k = 1; k < 8; k++) {
17168 GemmMicrokernelTester()
17169 .mr(1)
17170 .nr(4)
17171 .kr(8)
17172 .sr(1)
17173 .m(1)
17174 .n(4)
17175 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080017176 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017177 }
17178 }
17179
17180 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_lt_8_subtile) {
17181 TEST_REQUIRES_X86_AVX;
17182 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017183 for (uint32_t n = 1; n <= 4; n++) {
17184 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017185 GemmMicrokernelTester()
17186 .mr(1)
17187 .nr(4)
17188 .kr(8)
17189 .sr(1)
17190 .m(m)
17191 .n(n)
17192 .k(k)
17193 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017194 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017195 }
17196 }
17197 }
17198 }
17199
17200 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_gt_8) {
17201 TEST_REQUIRES_X86_AVX;
17202 for (size_t k = 9; k < 16; k++) {
17203 GemmMicrokernelTester()
17204 .mr(1)
17205 .nr(4)
17206 .kr(8)
17207 .sr(1)
17208 .m(1)
17209 .n(4)
17210 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080017211 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017212 }
17213 }
17214
17215 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_gt_8_subtile) {
17216 TEST_REQUIRES_X86_AVX;
17217 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017218 for (uint32_t n = 1; n <= 4; n++) {
17219 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017220 GemmMicrokernelTester()
17221 .mr(1)
17222 .nr(4)
17223 .kr(8)
17224 .sr(1)
17225 .m(m)
17226 .n(n)
17227 .k(k)
17228 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017229 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017230 }
17231 }
17232 }
17233 }
17234
17235 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_div_8) {
17236 TEST_REQUIRES_X86_AVX;
17237 for (size_t k = 16; k <= 80; k += 8) {
17238 GemmMicrokernelTester()
17239 .mr(1)
17240 .nr(4)
17241 .kr(8)
17242 .sr(1)
17243 .m(1)
17244 .n(4)
17245 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080017246 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017247 }
17248 }
17249
17250 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_div_8_subtile) {
17251 TEST_REQUIRES_X86_AVX;
17252 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017253 for (uint32_t n = 1; n <= 4; n++) {
17254 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017255 GemmMicrokernelTester()
17256 .mr(1)
17257 .nr(4)
17258 .kr(8)
17259 .sr(1)
17260 .m(m)
17261 .n(n)
17262 .k(k)
17263 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017264 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017265 }
17266 }
17267 }
17268 }
17269
17270 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_gt_4) {
17271 TEST_REQUIRES_X86_AVX;
17272 for (uint32_t n = 5; n < 8; n++) {
17273 for (size_t k = 1; k <= 40; k += 9) {
17274 GemmMicrokernelTester()
17275 .mr(1)
17276 .nr(4)
17277 .kr(8)
17278 .sr(1)
17279 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017280 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017281 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080017282 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017283 }
17284 }
17285 }
17286
17287 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_gt_4_strided_cn) {
17288 TEST_REQUIRES_X86_AVX;
17289 for (uint32_t n = 5; n < 8; n++) {
17290 for (size_t k = 1; k <= 40; k += 9) {
17291 GemmMicrokernelTester()
17292 .mr(1)
17293 .nr(4)
17294 .kr(8)
17295 .sr(1)
17296 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017297 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017298 .k(k)
17299 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080017300 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017301 }
17302 }
17303 }
17304
17305 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_gt_4_subtile) {
17306 TEST_REQUIRES_X86_AVX;
17307 for (uint32_t n = 5; n < 8; n++) {
17308 for (size_t k = 1; k <= 40; k += 9) {
17309 for (uint32_t m = 1; m <= 1; m++) {
17310 GemmMicrokernelTester()
17311 .mr(1)
17312 .nr(4)
17313 .kr(8)
17314 .sr(1)
17315 .m(m)
17316 .n(n)
17317 .k(k)
17318 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017319 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017320 }
17321 }
17322 }
17323 }
17324
17325 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_div_4) {
17326 TEST_REQUIRES_X86_AVX;
17327 for (uint32_t n = 8; n <= 12; n += 4) {
17328 for (size_t k = 1; k <= 40; k += 9) {
17329 GemmMicrokernelTester()
17330 .mr(1)
17331 .nr(4)
17332 .kr(8)
17333 .sr(1)
17334 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017335 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017336 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080017337 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017338 }
17339 }
17340 }
17341
17342 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_div_4_strided_cn) {
17343 TEST_REQUIRES_X86_AVX;
17344 for (uint32_t n = 8; n <= 12; n += 4) {
17345 for (size_t k = 1; k <= 40; k += 9) {
17346 GemmMicrokernelTester()
17347 .mr(1)
17348 .nr(4)
17349 .kr(8)
17350 .sr(1)
17351 .m(1)
17352 .n(n)
17353 .k(k)
17354 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080017355 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017356 }
17357 }
17358 }
17359
17360 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_div_4_subtile) {
17361 TEST_REQUIRES_X86_AVX;
17362 for (uint32_t n = 8; n <= 12; n += 4) {
17363 for (size_t k = 1; k <= 40; k += 9) {
17364 for (uint32_t m = 1; m <= 1; m++) {
17365 GemmMicrokernelTester()
17366 .mr(1)
17367 .nr(4)
17368 .kr(8)
17369 .sr(1)
17370 .m(m)
17371 .n(n)
17372 .k(k)
17373 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017374 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017375 }
17376 }
17377 }
17378 }
17379
17380 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, small_kernel) {
17381 TEST_REQUIRES_X86_AVX;
17382 for (size_t k = 1; k <= 40; k += 9) {
17383 GemmMicrokernelTester()
17384 .mr(1)
17385 .nr(4)
17386 .kr(8)
17387 .sr(1)
17388 .m(1)
17389 .n(4)
17390 .k(k)
17391 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080017392 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017393 }
17394 }
17395
17396 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, small_kernel_subtile) {
17397 TEST_REQUIRES_X86_AVX;
17398 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017399 for (uint32_t n = 1; n <= 4; n++) {
17400 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017401 GemmMicrokernelTester()
17402 .mr(1)
17403 .nr(4)
17404 .kr(8)
17405 .sr(1)
17406 .m(m)
17407 .n(n)
17408 .k(k)
17409 .ks(3)
17410 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017411 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017412 }
17413 }
17414 }
17415 }
17416
17417 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_gt_4_small_kernel) {
17418 TEST_REQUIRES_X86_AVX;
17419 for (uint32_t n = 5; n < 8; n++) {
17420 for (size_t k = 1; k <= 40; k += 9) {
17421 GemmMicrokernelTester()
17422 .mr(1)
17423 .nr(4)
17424 .kr(8)
17425 .sr(1)
17426 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017427 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017428 .k(k)
17429 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080017430 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017431 }
17432 }
17433 }
17434
17435 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_div_4_small_kernel) {
17436 TEST_REQUIRES_X86_AVX;
17437 for (uint32_t n = 8; n <= 12; n += 4) {
17438 for (size_t k = 1; k <= 40; k += 9) {
17439 GemmMicrokernelTester()
17440 .mr(1)
17441 .nr(4)
17442 .kr(8)
17443 .sr(1)
17444 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017445 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017446 .k(k)
17447 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080017448 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017449 }
17450 }
17451 }
17452
17453 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, strided_cm_subtile) {
17454 TEST_REQUIRES_X86_AVX;
17455 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017456 for (uint32_t n = 1; n <= 4; n++) {
17457 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017458 GemmMicrokernelTester()
17459 .mr(1)
17460 .nr(4)
17461 .kr(8)
17462 .sr(1)
17463 .m(m)
17464 .n(n)
17465 .k(k)
17466 .cm_stride(7)
17467 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017468 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017469 }
17470 }
17471 }
17472 }
17473
17474 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, a_offset) {
17475 TEST_REQUIRES_X86_AVX;
17476 for (size_t k = 1; k <= 40; k += 9) {
17477 GemmMicrokernelTester()
17478 .mr(1)
17479 .nr(4)
17480 .kr(8)
17481 .sr(1)
17482 .m(1)
17483 .n(4)
17484 .k(k)
17485 .ks(3)
17486 .a_offset(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080017487 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017488 }
17489 }
17490
17491 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, zero) {
17492 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080017493 for (size_t k = 1; k <= 40; k += 9) {
17494 for (uint32_t mz = 0; mz < 1; mz++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017495 GemmMicrokernelTester()
17496 .mr(1)
17497 .nr(4)
17498 .kr(8)
17499 .sr(1)
17500 .m(1)
17501 .n(4)
17502 .k(k)
17503 .ks(3)
17504 .a_offset(43)
17505 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080017506 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017507 }
17508 }
17509 }
17510
17511 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, qmin) {
17512 TEST_REQUIRES_X86_AVX;
17513 GemmMicrokernelTester()
17514 .mr(1)
17515 .nr(4)
17516 .kr(8)
17517 .sr(1)
17518 .m(1)
17519 .n(4)
17520 .k(8)
17521 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017522 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017523 }
17524
17525 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, qmax) {
17526 TEST_REQUIRES_X86_AVX;
17527 GemmMicrokernelTester()
17528 .mr(1)
17529 .nr(4)
17530 .kr(8)
17531 .sr(1)
17532 .m(1)
17533 .n(4)
17534 .k(8)
17535 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017536 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017537 }
17538
17539 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, strided_cm) {
17540 TEST_REQUIRES_X86_AVX;
17541 GemmMicrokernelTester()
17542 .mr(1)
17543 .nr(4)
17544 .kr(8)
17545 .sr(1)
17546 .m(1)
17547 .n(4)
17548 .k(8)
17549 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080017550 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017551 }
17552
17553 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, no_a_zero_point) {
17554 TEST_REQUIRES_X86_AVX;
17555 for (size_t k = 1; k <= 40; k += 9) {
17556 GemmMicrokernelTester()
17557 .mr(1)
17558 .nr(4)
17559 .kr(8)
17560 .sr(1)
17561 .m(1)
17562 .n(4)
17563 .k(k)
17564 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080017565 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017566 }
17567 }
17568
17569 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, no_b_zero_point) {
17570 TEST_REQUIRES_X86_AVX;
17571 for (size_t k = 1; k <= 40; k += 9) {
17572 GemmMicrokernelTester()
17573 .mr(1)
17574 .nr(4)
17575 .kr(8)
17576 .sr(1)
17577 .m(1)
17578 .n(4)
17579 .k(k)
17580 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080017581 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017582 }
17583 }
17584
17585 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, no_zero_point) {
17586 TEST_REQUIRES_X86_AVX;
17587 for (size_t k = 1; k <= 40; k += 9) {
17588 GemmMicrokernelTester()
17589 .mr(1)
17590 .nr(4)
17591 .kr(8)
17592 .sr(1)
17593 .m(1)
17594 .n(4)
17595 .k(k)
17596 .a_zero_point(0)
17597 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080017598 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017599 }
17600 }
17601#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
17602
17603
17604#if XNN_ARCH_X86 || XNN_ARCH_X86_64
17605 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_eq_8) {
17606 TEST_REQUIRES_X86_AVX;
17607 GemmMicrokernelTester()
17608 .mr(2)
17609 .nr(4)
17610 .kr(8)
17611 .sr(1)
17612 .m(2)
17613 .n(4)
17614 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080017615 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017616 }
17617
17618 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, strided_cn) {
17619 TEST_REQUIRES_X86_AVX;
17620 GemmMicrokernelTester()
17621 .mr(2)
17622 .nr(4)
17623 .kr(8)
17624 .sr(1)
17625 .m(2)
17626 .n(4)
17627 .k(8)
17628 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080017629 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017630 }
17631
17632 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_eq_8_subtile) {
17633 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080017634 for (uint32_t n = 1; n <= 4; n++) {
17635 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017636 GemmMicrokernelTester()
17637 .mr(2)
17638 .nr(4)
17639 .kr(8)
17640 .sr(1)
17641 .m(m)
17642 .n(n)
17643 .k(8)
17644 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017645 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017646 }
17647 }
17648 }
17649
17650 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_eq_8_subtile_m) {
17651 TEST_REQUIRES_X86_AVX;
17652 for (uint32_t m = 1; m <= 2; m++) {
17653 GemmMicrokernelTester()
17654 .mr(2)
17655 .nr(4)
17656 .kr(8)
17657 .sr(1)
17658 .m(m)
17659 .n(4)
17660 .k(8)
17661 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017662 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017663 }
17664 }
17665
17666 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_eq_8_subtile_n) {
17667 TEST_REQUIRES_X86_AVX;
17668 for (uint32_t n = 1; n <= 4; n++) {
17669 GemmMicrokernelTester()
17670 .mr(2)
17671 .nr(4)
17672 .kr(8)
17673 .sr(1)
17674 .m(2)
17675 .n(n)
17676 .k(8)
17677 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017678 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017679 }
17680 }
17681
17682 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_lt_8) {
17683 TEST_REQUIRES_X86_AVX;
17684 for (size_t k = 1; k < 8; k++) {
17685 GemmMicrokernelTester()
17686 .mr(2)
17687 .nr(4)
17688 .kr(8)
17689 .sr(1)
17690 .m(2)
17691 .n(4)
17692 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080017693 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017694 }
17695 }
17696
17697 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_lt_8_subtile) {
17698 TEST_REQUIRES_X86_AVX;
17699 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017700 for (uint32_t n = 1; n <= 4; n++) {
17701 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017702 GemmMicrokernelTester()
17703 .mr(2)
17704 .nr(4)
17705 .kr(8)
17706 .sr(1)
17707 .m(m)
17708 .n(n)
17709 .k(k)
17710 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017711 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017712 }
17713 }
17714 }
17715 }
17716
17717 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_gt_8) {
17718 TEST_REQUIRES_X86_AVX;
17719 for (size_t k = 9; k < 16; k++) {
17720 GemmMicrokernelTester()
17721 .mr(2)
17722 .nr(4)
17723 .kr(8)
17724 .sr(1)
17725 .m(2)
17726 .n(4)
17727 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080017728 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017729 }
17730 }
17731
17732 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_gt_8_subtile) {
17733 TEST_REQUIRES_X86_AVX;
17734 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017735 for (uint32_t n = 1; n <= 4; n++) {
17736 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017737 GemmMicrokernelTester()
17738 .mr(2)
17739 .nr(4)
17740 .kr(8)
17741 .sr(1)
17742 .m(m)
17743 .n(n)
17744 .k(k)
17745 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017746 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017747 }
17748 }
17749 }
17750 }
17751
17752 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_div_8) {
17753 TEST_REQUIRES_X86_AVX;
17754 for (size_t k = 16; k <= 80; k += 8) {
17755 GemmMicrokernelTester()
17756 .mr(2)
17757 .nr(4)
17758 .kr(8)
17759 .sr(1)
17760 .m(2)
17761 .n(4)
17762 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080017763 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017764 }
17765 }
17766
17767 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_div_8_subtile) {
17768 TEST_REQUIRES_X86_AVX;
17769 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017770 for (uint32_t n = 1; n <= 4; n++) {
17771 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017772 GemmMicrokernelTester()
17773 .mr(2)
17774 .nr(4)
17775 .kr(8)
17776 .sr(1)
17777 .m(m)
17778 .n(n)
17779 .k(k)
17780 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017781 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017782 }
17783 }
17784 }
17785 }
17786
17787 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_gt_4) {
17788 TEST_REQUIRES_X86_AVX;
17789 for (uint32_t n = 5; n < 8; n++) {
17790 for (size_t k = 1; k <= 40; k += 9) {
17791 GemmMicrokernelTester()
17792 .mr(2)
17793 .nr(4)
17794 .kr(8)
17795 .sr(1)
17796 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017797 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017798 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080017799 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017800 }
17801 }
17802 }
17803
17804 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_gt_4_strided_cn) {
17805 TEST_REQUIRES_X86_AVX;
17806 for (uint32_t n = 5; n < 8; n++) {
17807 for (size_t k = 1; k <= 40; k += 9) {
17808 GemmMicrokernelTester()
17809 .mr(2)
17810 .nr(4)
17811 .kr(8)
17812 .sr(1)
17813 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017814 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017815 .k(k)
17816 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080017817 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017818 }
17819 }
17820 }
17821
17822 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_gt_4_subtile) {
17823 TEST_REQUIRES_X86_AVX;
17824 for (uint32_t n = 5; n < 8; n++) {
17825 for (size_t k = 1; k <= 40; k += 9) {
17826 for (uint32_t m = 1; m <= 2; m++) {
17827 GemmMicrokernelTester()
17828 .mr(2)
17829 .nr(4)
17830 .kr(8)
17831 .sr(1)
17832 .m(m)
17833 .n(n)
17834 .k(k)
17835 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017836 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017837 }
17838 }
17839 }
17840 }
17841
17842 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_div_4) {
17843 TEST_REQUIRES_X86_AVX;
17844 for (uint32_t n = 8; n <= 12; n += 4) {
17845 for (size_t k = 1; k <= 40; k += 9) {
17846 GemmMicrokernelTester()
17847 .mr(2)
17848 .nr(4)
17849 .kr(8)
17850 .sr(1)
17851 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017852 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017853 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080017854 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017855 }
17856 }
17857 }
17858
17859 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_div_4_strided_cn) {
17860 TEST_REQUIRES_X86_AVX;
17861 for (uint32_t n = 8; n <= 12; n += 4) {
17862 for (size_t k = 1; k <= 40; k += 9) {
17863 GemmMicrokernelTester()
17864 .mr(2)
17865 .nr(4)
17866 .kr(8)
17867 .sr(1)
17868 .m(2)
17869 .n(n)
17870 .k(k)
17871 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080017872 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017873 }
17874 }
17875 }
17876
17877 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_div_4_subtile) {
17878 TEST_REQUIRES_X86_AVX;
17879 for (uint32_t n = 8; n <= 12; n += 4) {
17880 for (size_t k = 1; k <= 40; k += 9) {
17881 for (uint32_t m = 1; m <= 2; m++) {
17882 GemmMicrokernelTester()
17883 .mr(2)
17884 .nr(4)
17885 .kr(8)
17886 .sr(1)
17887 .m(m)
17888 .n(n)
17889 .k(k)
17890 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017891 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017892 }
17893 }
17894 }
17895 }
17896
17897 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, small_kernel) {
17898 TEST_REQUIRES_X86_AVX;
17899 for (size_t k = 1; k <= 40; k += 9) {
17900 GemmMicrokernelTester()
17901 .mr(2)
17902 .nr(4)
17903 .kr(8)
17904 .sr(1)
17905 .m(2)
17906 .n(4)
17907 .k(k)
17908 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080017909 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017910 }
17911 }
17912
17913 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, small_kernel_subtile) {
17914 TEST_REQUIRES_X86_AVX;
17915 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017916 for (uint32_t n = 1; n <= 4; n++) {
17917 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017918 GemmMicrokernelTester()
17919 .mr(2)
17920 .nr(4)
17921 .kr(8)
17922 .sr(1)
17923 .m(m)
17924 .n(n)
17925 .k(k)
17926 .ks(3)
17927 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017928 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017929 }
17930 }
17931 }
17932 }
17933
17934 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_gt_4_small_kernel) {
17935 TEST_REQUIRES_X86_AVX;
17936 for (uint32_t n = 5; n < 8; n++) {
17937 for (size_t k = 1; k <= 40; k += 9) {
17938 GemmMicrokernelTester()
17939 .mr(2)
17940 .nr(4)
17941 .kr(8)
17942 .sr(1)
17943 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017944 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017945 .k(k)
17946 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080017947 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017948 }
17949 }
17950 }
17951
17952 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_div_4_small_kernel) {
17953 TEST_REQUIRES_X86_AVX;
17954 for (uint32_t n = 8; n <= 12; n += 4) {
17955 for (size_t k = 1; k <= 40; k += 9) {
17956 GemmMicrokernelTester()
17957 .mr(2)
17958 .nr(4)
17959 .kr(8)
17960 .sr(1)
17961 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017962 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017963 .k(k)
17964 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080017965 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017966 }
17967 }
17968 }
17969
17970 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, strided_cm_subtile) {
17971 TEST_REQUIRES_X86_AVX;
17972 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017973 for (uint32_t n = 1; n <= 4; n++) {
17974 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017975 GemmMicrokernelTester()
17976 .mr(2)
17977 .nr(4)
17978 .kr(8)
17979 .sr(1)
17980 .m(m)
17981 .n(n)
17982 .k(k)
17983 .cm_stride(7)
17984 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017985 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070017986 }
17987 }
17988 }
17989 }
17990
17991 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, a_offset) {
17992 TEST_REQUIRES_X86_AVX;
17993 for (size_t k = 1; k <= 40; k += 9) {
17994 GemmMicrokernelTester()
17995 .mr(2)
17996 .nr(4)
17997 .kr(8)
17998 .sr(1)
17999 .m(2)
18000 .n(4)
18001 .k(k)
18002 .ks(3)
18003 .a_offset(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080018004 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070018005 }
18006 }
18007
18008 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, zero) {
18009 TEST_REQUIRES_X86_AVX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080018010 for (size_t k = 1; k <= 40; k += 9) {
18011 for (uint32_t mz = 0; mz < 2; mz++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070018012 GemmMicrokernelTester()
18013 .mr(2)
18014 .nr(4)
18015 .kr(8)
18016 .sr(1)
18017 .m(2)
18018 .n(4)
18019 .k(k)
18020 .ks(3)
18021 .a_offset(83)
18022 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080018023 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070018024 }
18025 }
18026 }
18027
18028 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, qmin) {
18029 TEST_REQUIRES_X86_AVX;
18030 GemmMicrokernelTester()
18031 .mr(2)
18032 .nr(4)
18033 .kr(8)
18034 .sr(1)
18035 .m(2)
18036 .n(4)
18037 .k(8)
18038 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018039 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070018040 }
18041
18042 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, qmax) {
18043 TEST_REQUIRES_X86_AVX;
18044 GemmMicrokernelTester()
18045 .mr(2)
18046 .nr(4)
18047 .kr(8)
18048 .sr(1)
18049 .m(2)
18050 .n(4)
18051 .k(8)
18052 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018053 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070018054 }
18055
18056 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, strided_cm) {
18057 TEST_REQUIRES_X86_AVX;
18058 GemmMicrokernelTester()
18059 .mr(2)
18060 .nr(4)
18061 .kr(8)
18062 .sr(1)
18063 .m(2)
18064 .n(4)
18065 .k(8)
18066 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080018067 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070018068 }
18069
18070 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, no_a_zero_point) {
18071 TEST_REQUIRES_X86_AVX;
18072 for (size_t k = 1; k <= 40; k += 9) {
18073 GemmMicrokernelTester()
18074 .mr(2)
18075 .nr(4)
18076 .kr(8)
18077 .sr(1)
18078 .m(2)
18079 .n(4)
18080 .k(k)
18081 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080018082 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070018083 }
18084 }
18085
18086 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, no_b_zero_point) {
18087 TEST_REQUIRES_X86_AVX;
18088 for (size_t k = 1; k <= 40; k += 9) {
18089 GemmMicrokernelTester()
18090 .mr(2)
18091 .nr(4)
18092 .kr(8)
18093 .sr(1)
18094 .m(2)
18095 .n(4)
18096 .k(k)
18097 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080018098 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070018099 }
18100 }
18101
18102 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, no_zero_point) {
18103 TEST_REQUIRES_X86_AVX;
18104 for (size_t k = 1; k <= 40; k += 9) {
18105 GemmMicrokernelTester()
18106 .mr(2)
18107 .nr(4)
18108 .kr(8)
18109 .sr(1)
18110 .m(2)
18111 .n(4)
18112 .k(k)
18113 .a_zero_point(0)
18114 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080018115 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070018116 }
18117 }
18118#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
18119
18120
18121#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhanef47f8d2021-07-02 15:08:32 -070018122 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_eq_8) {
18123 TEST_REQUIRES_X86_XOP;
18124 GemmMicrokernelTester()
18125 .mr(3)
18126 .nr(4)
18127 .kr(8)
18128 .sr(1)
18129 .m(3)
18130 .n(4)
18131 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080018132 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070018133 }
18134
18135 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, strided_cn) {
18136 TEST_REQUIRES_X86_XOP;
18137 GemmMicrokernelTester()
18138 .mr(3)
18139 .nr(4)
18140 .kr(8)
18141 .sr(1)
18142 .m(3)
18143 .n(4)
18144 .k(8)
18145 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080018146 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070018147 }
18148
18149 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_eq_8_subtile) {
18150 TEST_REQUIRES_X86_XOP;
Zhi An Ng83844ae2022-01-14 09:52:25 -080018151 for (uint32_t n = 1; n <= 4; n++) {
18152 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070018153 GemmMicrokernelTester()
18154 .mr(3)
18155 .nr(4)
18156 .kr(8)
18157 .sr(1)
18158 .m(m)
18159 .n(n)
18160 .k(8)
18161 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018162 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070018163 }
18164 }
18165 }
18166
18167 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_eq_8_subtile_m) {
18168 TEST_REQUIRES_X86_XOP;
18169 for (uint32_t m = 1; m <= 3; m++) {
18170 GemmMicrokernelTester()
18171 .mr(3)
18172 .nr(4)
18173 .kr(8)
18174 .sr(1)
18175 .m(m)
18176 .n(4)
18177 .k(8)
18178 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018179 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070018180 }
18181 }
18182
18183 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_eq_8_subtile_n) {
18184 TEST_REQUIRES_X86_XOP;
18185 for (uint32_t n = 1; n <= 4; n++) {
18186 GemmMicrokernelTester()
18187 .mr(3)
18188 .nr(4)
18189 .kr(8)
18190 .sr(1)
18191 .m(3)
18192 .n(n)
18193 .k(8)
18194 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018195 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070018196 }
18197 }
18198
18199 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_lt_8) {
18200 TEST_REQUIRES_X86_XOP;
18201 for (size_t k = 1; k < 8; k++) {
18202 GemmMicrokernelTester()
18203 .mr(3)
18204 .nr(4)
18205 .kr(8)
18206 .sr(1)
18207 .m(3)
18208 .n(4)
18209 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080018210 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070018211 }
18212 }
18213
18214 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_lt_8_subtile) {
18215 TEST_REQUIRES_X86_XOP;
18216 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018217 for (uint32_t n = 1; n <= 4; n++) {
18218 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070018219 GemmMicrokernelTester()
18220 .mr(3)
18221 .nr(4)
18222 .kr(8)
18223 .sr(1)
18224 .m(m)
18225 .n(n)
18226 .k(k)
18227 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018228 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070018229 }
18230 }
18231 }
18232 }
18233
18234 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_gt_8) {
18235 TEST_REQUIRES_X86_XOP;
18236 for (size_t k = 9; k < 16; k++) {
18237 GemmMicrokernelTester()
18238 .mr(3)
18239 .nr(4)
18240 .kr(8)
18241 .sr(1)
18242 .m(3)
18243 .n(4)
18244 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080018245 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070018246 }
18247 }
18248
18249 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_gt_8_subtile) {
18250 TEST_REQUIRES_X86_XOP;
18251 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018252 for (uint32_t n = 1; n <= 4; n++) {
18253 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070018254 GemmMicrokernelTester()
18255 .mr(3)
18256 .nr(4)
18257 .kr(8)
18258 .sr(1)
18259 .m(m)
18260 .n(n)
18261 .k(k)
18262 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018263 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070018264 }
18265 }
18266 }
18267 }
18268
18269 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_div_8) {
18270 TEST_REQUIRES_X86_XOP;
18271 for (size_t k = 16; k <= 80; k += 8) {
18272 GemmMicrokernelTester()
18273 .mr(3)
18274 .nr(4)
18275 .kr(8)
18276 .sr(1)
18277 .m(3)
18278 .n(4)
18279 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080018280 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070018281 }
18282 }
18283
18284 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_div_8_subtile) {
18285 TEST_REQUIRES_X86_XOP;
18286 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018287 for (uint32_t n = 1; n <= 4; n++) {
18288 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070018289 GemmMicrokernelTester()
18290 .mr(3)
18291 .nr(4)
18292 .kr(8)
18293 .sr(1)
18294 .m(m)
18295 .n(n)
18296 .k(k)
18297 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018298 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070018299 }
18300 }
18301 }
18302 }
18303
18304 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_gt_4) {
18305 TEST_REQUIRES_X86_XOP;
18306 for (uint32_t n = 5; n < 8; n++) {
18307 for (size_t k = 1; k <= 40; k += 9) {
18308 GemmMicrokernelTester()
18309 .mr(3)
18310 .nr(4)
18311 .kr(8)
18312 .sr(1)
18313 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018314 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070018315 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080018316 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070018317 }
18318 }
18319 }
18320
18321 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_gt_4_strided_cn) {
18322 TEST_REQUIRES_X86_XOP;
18323 for (uint32_t n = 5; n < 8; n++) {
18324 for (size_t k = 1; k <= 40; k += 9) {
18325 GemmMicrokernelTester()
18326 .mr(3)
18327 .nr(4)
18328 .kr(8)
18329 .sr(1)
18330 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018331 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070018332 .k(k)
18333 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080018334 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070018335 }
18336 }
18337 }
18338
18339 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_gt_4_subtile) {
18340 TEST_REQUIRES_X86_XOP;
18341 for (uint32_t n = 5; n < 8; n++) {
18342 for (size_t k = 1; k <= 40; k += 9) {
18343 for (uint32_t m = 1; m <= 3; m++) {
18344 GemmMicrokernelTester()
18345 .mr(3)
18346 .nr(4)
18347 .kr(8)
18348 .sr(1)
18349 .m(m)
18350 .n(n)
18351 .k(k)
18352 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018353 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070018354 }
18355 }
18356 }
18357 }
18358
18359 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_div_4) {
18360 TEST_REQUIRES_X86_XOP;
18361 for (uint32_t n = 8; n <= 12; n += 4) {
18362 for (size_t k = 1; k <= 40; k += 9) {
18363 GemmMicrokernelTester()
18364 .mr(3)
18365 .nr(4)
18366 .kr(8)
18367 .sr(1)
18368 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018369 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070018370 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080018371 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070018372 }
18373 }
18374 }
18375
18376 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_div_4_strided_cn) {
18377 TEST_REQUIRES_X86_XOP;
18378 for (uint32_t n = 8; n <= 12; n += 4) {
18379 for (size_t k = 1; k <= 40; k += 9) {
18380 GemmMicrokernelTester()
18381 .mr(3)
18382 .nr(4)
18383 .kr(8)
18384 .sr(1)
18385 .m(3)
18386 .n(n)
18387 .k(k)
18388 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080018389 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070018390 }
18391 }
18392 }
18393
18394 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_div_4_subtile) {
18395 TEST_REQUIRES_X86_XOP;
18396 for (uint32_t n = 8; n <= 12; n += 4) {
18397 for (size_t k = 1; k <= 40; k += 9) {
18398 for (uint32_t m = 1; m <= 3; m++) {
18399 GemmMicrokernelTester()
18400 .mr(3)
18401 .nr(4)
18402 .kr(8)
18403 .sr(1)
18404 .m(m)
18405 .n(n)
18406 .k(k)
18407 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018408 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070018409 }
18410 }
18411 }
18412 }
18413
18414 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, small_kernel) {
18415 TEST_REQUIRES_X86_XOP;
18416 for (size_t k = 1; k <= 40; k += 9) {
18417 GemmMicrokernelTester()
18418 .mr(3)
18419 .nr(4)
18420 .kr(8)
18421 .sr(1)
18422 .m(3)
18423 .n(4)
18424 .k(k)
18425 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080018426 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070018427 }
18428 }
18429
18430 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, small_kernel_subtile) {
18431 TEST_REQUIRES_X86_XOP;
18432 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018433 for (uint32_t n = 1; n <= 4; n++) {
18434 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070018435 GemmMicrokernelTester()
18436 .mr(3)
18437 .nr(4)
18438 .kr(8)
18439 .sr(1)
18440 .m(m)
18441 .n(n)
18442 .k(k)
18443 .ks(3)
18444 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018445 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070018446 }
18447 }
18448 }
18449 }
18450
18451 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_gt_4_small_kernel) {
18452 TEST_REQUIRES_X86_XOP;
18453 for (uint32_t n = 5; n < 8; n++) {
18454 for (size_t k = 1; k <= 40; k += 9) {
18455 GemmMicrokernelTester()
18456 .mr(3)
18457 .nr(4)
18458 .kr(8)
18459 .sr(1)
18460 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018461 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070018462 .k(k)
18463 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080018464 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070018465 }
18466 }
18467 }
18468
18469 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_div_4_small_kernel) {
18470 TEST_REQUIRES_X86_XOP;
18471 for (uint32_t n = 8; n <= 12; n += 4) {
18472 for (size_t k = 1; k <= 40; k += 9) {
18473 GemmMicrokernelTester()
18474 .mr(3)
18475 .nr(4)
18476 .kr(8)
18477 .sr(1)
18478 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018479 .n(n)
Marat Dukhanef47f8d2021-07-02 15:08:32 -070018480 .k(k)
18481 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080018482 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070018483 }
18484 }
18485 }
18486
18487 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, strided_cm_subtile) {
18488 TEST_REQUIRES_X86_XOP;
18489 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018490 for (uint32_t n = 1; n <= 4; n++) {
18491 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070018492 GemmMicrokernelTester()
18493 .mr(3)
18494 .nr(4)
18495 .kr(8)
18496 .sr(1)
18497 .m(m)
18498 .n(n)
18499 .k(k)
18500 .cm_stride(7)
18501 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018502 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070018503 }
18504 }
18505 }
18506 }
18507
18508 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, a_offset) {
18509 TEST_REQUIRES_X86_XOP;
18510 for (size_t k = 1; k <= 40; k += 9) {
18511 GemmMicrokernelTester()
18512 .mr(3)
18513 .nr(4)
18514 .kr(8)
18515 .sr(1)
18516 .m(3)
18517 .n(4)
18518 .k(k)
18519 .ks(3)
18520 .a_offset(127)
Marat Dukhan50323b82022-01-11 00:12:01 -080018521 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070018522 }
18523 }
18524
18525 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, zero) {
18526 TEST_REQUIRES_X86_XOP;
Zhi An Ng83844ae2022-01-14 09:52:25 -080018527 for (size_t k = 1; k <= 40; k += 9) {
18528 for (uint32_t mz = 0; mz < 3; mz++) {
Marat Dukhanef47f8d2021-07-02 15:08:32 -070018529 GemmMicrokernelTester()
18530 .mr(3)
18531 .nr(4)
18532 .kr(8)
18533 .sr(1)
18534 .m(3)
18535 .n(4)
18536 .k(k)
18537 .ks(3)
18538 .a_offset(127)
18539 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080018540 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070018541 }
18542 }
18543 }
18544
18545 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, qmin) {
18546 TEST_REQUIRES_X86_XOP;
18547 GemmMicrokernelTester()
18548 .mr(3)
18549 .nr(4)
18550 .kr(8)
18551 .sr(1)
18552 .m(3)
18553 .n(4)
18554 .k(8)
18555 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018556 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070018557 }
18558
18559 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, qmax) {
18560 TEST_REQUIRES_X86_XOP;
18561 GemmMicrokernelTester()
18562 .mr(3)
18563 .nr(4)
18564 .kr(8)
18565 .sr(1)
18566 .m(3)
18567 .n(4)
18568 .k(8)
18569 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018570 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070018571 }
18572
18573 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, strided_cm) {
18574 TEST_REQUIRES_X86_XOP;
18575 GemmMicrokernelTester()
18576 .mr(3)
18577 .nr(4)
18578 .kr(8)
18579 .sr(1)
18580 .m(3)
18581 .n(4)
18582 .k(8)
18583 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080018584 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070018585 }
18586
18587 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, no_a_zero_point) {
18588 TEST_REQUIRES_X86_XOP;
18589 for (size_t k = 1; k <= 40; k += 9) {
18590 GemmMicrokernelTester()
18591 .mr(3)
18592 .nr(4)
18593 .kr(8)
18594 .sr(1)
18595 .m(3)
18596 .n(4)
18597 .k(k)
18598 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080018599 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070018600 }
18601 }
18602
18603 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, no_b_zero_point) {
18604 TEST_REQUIRES_X86_XOP;
18605 for (size_t k = 1; k <= 40; k += 9) {
18606 GemmMicrokernelTester()
18607 .mr(3)
18608 .nr(4)
18609 .kr(8)
18610 .sr(1)
18611 .m(3)
18612 .n(4)
18613 .k(k)
18614 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080018615 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070018616 }
18617 }
18618
18619 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, no_zero_point) {
18620 TEST_REQUIRES_X86_XOP;
18621 for (size_t k = 1; k <= 40; k += 9) {
18622 GemmMicrokernelTester()
18623 .mr(3)
18624 .nr(4)
18625 .kr(8)
18626 .sr(1)
18627 .m(3)
18628 .n(4)
18629 .k(k)
18630 .a_zero_point(0)
18631 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080018632 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qu8_conv_minmax_fp32_sse2_params, xnn_qu8_requantize_fp32);
Marat Dukhanef47f8d2021-07-02 15:08:32 -070018633 }
18634 }
18635#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan902ef7f2021-07-02 16:11:06 -070018636
18637
18638#if XNN_ARCH_X86 || XNN_ARCH_X86_64
18639 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_eq_8) {
18640 TEST_REQUIRES_X86_AVX2;
18641 GemmMicrokernelTester()
18642 .mr(1)
18643 .nr(8)
18644 .kr(8)
18645 .sr(1)
18646 .m(1)
18647 .n(8)
18648 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080018649 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan902ef7f2021-07-02 16:11:06 -070018650 }
18651
18652 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, strided_cn) {
18653 TEST_REQUIRES_X86_AVX2;
18654 GemmMicrokernelTester()
18655 .mr(1)
18656 .nr(8)
18657 .kr(8)
18658 .sr(1)
18659 .m(1)
18660 .n(8)
18661 .k(8)
18662 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080018663 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan902ef7f2021-07-02 16:11:06 -070018664 }
18665
18666 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_eq_8_subtile) {
18667 TEST_REQUIRES_X86_AVX2;
Zhi An Ng83844ae2022-01-14 09:52:25 -080018668 for (uint32_t n = 1; n <= 8; n++) {
18669 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan902ef7f2021-07-02 16:11:06 -070018670 GemmMicrokernelTester()
18671 .mr(1)
18672 .nr(8)
18673 .kr(8)
18674 .sr(1)
18675 .m(m)
18676 .n(n)
18677 .k(8)
18678 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018679 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan902ef7f2021-07-02 16:11:06 -070018680 }
18681 }
18682 }
18683
18684 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_eq_8_subtile_m) {
18685 TEST_REQUIRES_X86_AVX2;
18686 for (uint32_t m = 1; m <= 1; m++) {
18687 GemmMicrokernelTester()
18688 .mr(1)
18689 .nr(8)
18690 .kr(8)
18691 .sr(1)
18692 .m(m)
18693 .n(8)
18694 .k(8)
18695 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018696 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan902ef7f2021-07-02 16:11:06 -070018697 }
18698 }
18699
18700 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_eq_8_subtile_n) {
18701 TEST_REQUIRES_X86_AVX2;
18702 for (uint32_t n = 1; n <= 8; n++) {
18703 GemmMicrokernelTester()
18704 .mr(1)
18705 .nr(8)
18706 .kr(8)
18707 .sr(1)
18708 .m(1)
18709 .n(n)
18710 .k(8)
18711 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018712 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan902ef7f2021-07-02 16:11:06 -070018713 }
18714 }
18715
18716 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_lt_8) {
18717 TEST_REQUIRES_X86_AVX2;
18718 for (size_t k = 1; k < 8; k++) {
18719 GemmMicrokernelTester()
18720 .mr(1)
18721 .nr(8)
18722 .kr(8)
18723 .sr(1)
18724 .m(1)
18725 .n(8)
18726 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080018727 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan902ef7f2021-07-02 16:11:06 -070018728 }
18729 }
18730
18731 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_lt_8_subtile) {
18732 TEST_REQUIRES_X86_AVX2;
18733 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018734 for (uint32_t n = 1; n <= 8; n++) {
18735 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan902ef7f2021-07-02 16:11:06 -070018736 GemmMicrokernelTester()
18737 .mr(1)
18738 .nr(8)
18739 .kr(8)
18740 .sr(1)
18741 .m(m)
18742 .n(n)
18743 .k(k)
18744 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018745 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan902ef7f2021-07-02 16:11:06 -070018746 }
18747 }
18748 }
18749 }
18750
18751 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_gt_8) {
18752 TEST_REQUIRES_X86_AVX2;
18753 for (size_t k = 9; k < 16; k++) {
18754 GemmMicrokernelTester()
18755 .mr(1)
18756 .nr(8)
18757 .kr(8)
18758 .sr(1)
18759 .m(1)
18760 .n(8)
18761 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080018762 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan902ef7f2021-07-02 16:11:06 -070018763 }
18764 }
18765
18766 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_gt_8_subtile) {
18767 TEST_REQUIRES_X86_AVX2;
18768 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018769 for (uint32_t n = 1; n <= 8; n++) {
18770 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan902ef7f2021-07-02 16:11:06 -070018771 GemmMicrokernelTester()
18772 .mr(1)
18773 .nr(8)
18774 .kr(8)
18775 .sr(1)
18776 .m(m)
18777 .n(n)
18778 .k(k)
18779 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018780 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan902ef7f2021-07-02 16:11:06 -070018781 }
18782 }
18783 }
18784 }
18785
18786 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_div_8) {
18787 TEST_REQUIRES_X86_AVX2;
18788 for (size_t k = 16; k <= 80; k += 8) {
18789 GemmMicrokernelTester()
18790 .mr(1)
18791 .nr(8)
18792 .kr(8)
18793 .sr(1)
18794 .m(1)
18795 .n(8)
18796 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080018797 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan902ef7f2021-07-02 16:11:06 -070018798 }
18799 }
18800
18801 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_div_8_subtile) {
18802 TEST_REQUIRES_X86_AVX2;
18803 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018804 for (uint32_t n = 1; n <= 8; n++) {
18805 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan902ef7f2021-07-02 16:11:06 -070018806 GemmMicrokernelTester()
18807 .mr(1)
18808 .nr(8)
18809 .kr(8)
18810 .sr(1)
18811 .m(m)
18812 .n(n)
18813 .k(k)
18814 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018815 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan902ef7f2021-07-02 16:11:06 -070018816 }
18817 }
18818 }
18819 }
18820
18821 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_gt_8) {
18822 TEST_REQUIRES_X86_AVX2;
18823 for (uint32_t n = 9; n < 16; n++) {
18824 for (size_t k = 1; k <= 40; k += 9) {
18825 GemmMicrokernelTester()
18826 .mr(1)
18827 .nr(8)
18828 .kr(8)
18829 .sr(1)
18830 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018831 .n(n)
Marat Dukhan902ef7f2021-07-02 16:11:06 -070018832 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080018833 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan902ef7f2021-07-02 16:11:06 -070018834 }
18835 }
18836 }
18837
18838 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_gt_8_strided_cn) {
18839 TEST_REQUIRES_X86_AVX2;
18840 for (uint32_t n = 9; n < 16; n++) {
18841 for (size_t k = 1; k <= 40; k += 9) {
18842 GemmMicrokernelTester()
18843 .mr(1)
18844 .nr(8)
18845 .kr(8)
18846 .sr(1)
18847 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018848 .n(n)
Marat Dukhan902ef7f2021-07-02 16:11:06 -070018849 .k(k)
18850 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080018851 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan902ef7f2021-07-02 16:11:06 -070018852 }
18853 }
18854 }
18855
18856 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_gt_8_subtile) {
18857 TEST_REQUIRES_X86_AVX2;
18858 for (uint32_t n = 9; n < 16; n++) {
18859 for (size_t k = 1; k <= 40; k += 9) {
18860 for (uint32_t m = 1; m <= 1; m++) {
18861 GemmMicrokernelTester()
18862 .mr(1)
18863 .nr(8)
18864 .kr(8)
18865 .sr(1)
18866 .m(m)
18867 .n(n)
18868 .k(k)
18869 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018870 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan902ef7f2021-07-02 16:11:06 -070018871 }
18872 }
18873 }
18874 }
18875
18876 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_div_8) {
18877 TEST_REQUIRES_X86_AVX2;
18878 for (uint32_t n = 16; n <= 24; n += 8) {
18879 for (size_t k = 1; k <= 40; k += 9) {
18880 GemmMicrokernelTester()
18881 .mr(1)
18882 .nr(8)
18883 .kr(8)
18884 .sr(1)
18885 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018886 .n(n)
Marat Dukhan902ef7f2021-07-02 16:11:06 -070018887 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080018888 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan902ef7f2021-07-02 16:11:06 -070018889 }
18890 }
18891 }
18892
18893 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_div_8_strided_cn) {
18894 TEST_REQUIRES_X86_AVX2;
18895 for (uint32_t n = 16; n <= 24; n += 8) {
18896 for (size_t k = 1; k <= 40; k += 9) {
18897 GemmMicrokernelTester()
18898 .mr(1)
18899 .nr(8)
18900 .kr(8)
18901 .sr(1)
18902 .m(1)
18903 .n(n)
18904 .k(k)
18905 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080018906 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan902ef7f2021-07-02 16:11:06 -070018907 }
18908 }
18909 }
18910
18911 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_div_8_subtile) {
18912 TEST_REQUIRES_X86_AVX2;
18913 for (uint32_t n = 16; n <= 24; n += 8) {
18914 for (size_t k = 1; k <= 40; k += 9) {
18915 for (uint32_t m = 1; m <= 1; m++) {
18916 GemmMicrokernelTester()
18917 .mr(1)
18918 .nr(8)
18919 .kr(8)
18920 .sr(1)
18921 .m(m)
18922 .n(n)
18923 .k(k)
18924 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018925 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan902ef7f2021-07-02 16:11:06 -070018926 }
18927 }
18928 }
18929 }
18930
18931 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, small_kernel) {
18932 TEST_REQUIRES_X86_AVX2;
18933 for (size_t k = 1; k <= 40; k += 9) {
18934 GemmMicrokernelTester()
18935 .mr(1)
18936 .nr(8)
18937 .kr(8)
18938 .sr(1)
18939 .m(1)
18940 .n(8)
18941 .k(k)
18942 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080018943 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan902ef7f2021-07-02 16:11:06 -070018944 }
18945 }
18946
18947 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, small_kernel_subtile) {
18948 TEST_REQUIRES_X86_AVX2;
18949 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018950 for (uint32_t n = 1; n <= 8; n++) {
18951 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan902ef7f2021-07-02 16:11:06 -070018952 GemmMicrokernelTester()
18953 .mr(1)
18954 .nr(8)
18955 .kr(8)
18956 .sr(1)
18957 .m(m)
18958 .n(n)
18959 .k(k)
18960 .ks(3)
18961 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018962 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan902ef7f2021-07-02 16:11:06 -070018963 }
18964 }
18965 }
18966 }
18967
18968 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_gt_8_small_kernel) {
18969 TEST_REQUIRES_X86_AVX2;
18970 for (uint32_t n = 9; n < 16; n++) {
18971 for (size_t k = 1; k <= 40; k += 9) {
18972 GemmMicrokernelTester()
18973 .mr(1)
18974 .nr(8)
18975 .kr(8)
18976 .sr(1)
18977 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018978 .n(n)
Marat Dukhan902ef7f2021-07-02 16:11:06 -070018979 .k(k)
18980 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080018981 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan902ef7f2021-07-02 16:11:06 -070018982 }
18983 }
18984 }
18985
18986 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_div_8_small_kernel) {
18987 TEST_REQUIRES_X86_AVX2;
18988 for (uint32_t n = 16; n <= 24; n += 8) {
18989 for (size_t k = 1; k <= 40; k += 9) {
18990 GemmMicrokernelTester()
18991 .mr(1)
18992 .nr(8)
18993 .kr(8)
18994 .sr(1)
18995 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018996 .n(n)
Marat Dukhan902ef7f2021-07-02 16:11:06 -070018997 .k(k)
18998 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080018999 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan902ef7f2021-07-02 16:11:06 -070019000 }
19001 }
19002 }
19003
19004 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, strided_cm_subtile) {
19005 TEST_REQUIRES_X86_AVX2;
19006 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019007 for (uint32_t n = 1; n <= 8; n++) {
19008 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan902ef7f2021-07-02 16:11:06 -070019009 GemmMicrokernelTester()
19010 .mr(1)
19011 .nr(8)
19012 .kr(8)
19013 .sr(1)
19014 .m(m)
19015 .n(n)
19016 .k(k)
19017 .cm_stride(11)
19018 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019019 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan902ef7f2021-07-02 16:11:06 -070019020 }
19021 }
19022 }
19023 }
19024
19025 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, a_offset) {
19026 TEST_REQUIRES_X86_AVX2;
19027 for (size_t k = 1; k <= 40; k += 9) {
19028 GemmMicrokernelTester()
19029 .mr(1)
19030 .nr(8)
19031 .kr(8)
19032 .sr(1)
19033 .m(1)
19034 .n(8)
19035 .k(k)
19036 .ks(3)
19037 .a_offset(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080019038 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan902ef7f2021-07-02 16:11:06 -070019039 }
19040 }
19041
19042 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, zero) {
19043 TEST_REQUIRES_X86_AVX2;
Zhi An Ng83844ae2022-01-14 09:52:25 -080019044 for (size_t k = 1; k <= 40; k += 9) {
19045 for (uint32_t mz = 0; mz < 1; mz++) {
Marat Dukhan902ef7f2021-07-02 16:11:06 -070019046 GemmMicrokernelTester()
19047 .mr(1)
19048 .nr(8)
19049 .kr(8)
19050 .sr(1)
19051 .m(1)
19052 .n(8)
19053 .k(k)
19054 .ks(3)
19055 .a_offset(43)
19056 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080019057 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan902ef7f2021-07-02 16:11:06 -070019058 }
19059 }
19060 }
19061
19062 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, qmin) {
19063 TEST_REQUIRES_X86_AVX2;
19064 GemmMicrokernelTester()
19065 .mr(1)
19066 .nr(8)
19067 .kr(8)
19068 .sr(1)
19069 .m(1)
19070 .n(8)
19071 .k(8)
19072 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019073 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan902ef7f2021-07-02 16:11:06 -070019074 }
19075
19076 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, qmax) {
19077 TEST_REQUIRES_X86_AVX2;
19078 GemmMicrokernelTester()
19079 .mr(1)
19080 .nr(8)
19081 .kr(8)
19082 .sr(1)
19083 .m(1)
19084 .n(8)
19085 .k(8)
19086 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019087 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan902ef7f2021-07-02 16:11:06 -070019088 }
19089
19090 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, strided_cm) {
19091 TEST_REQUIRES_X86_AVX2;
19092 GemmMicrokernelTester()
19093 .mr(1)
19094 .nr(8)
19095 .kr(8)
19096 .sr(1)
19097 .m(1)
19098 .n(8)
19099 .k(8)
19100 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080019101 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan902ef7f2021-07-02 16:11:06 -070019102 }
19103
19104 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, no_a_zero_point) {
19105 TEST_REQUIRES_X86_AVX2;
19106 for (size_t k = 1; k <= 40; k += 9) {
19107 GemmMicrokernelTester()
19108 .mr(1)
19109 .nr(8)
19110 .kr(8)
19111 .sr(1)
19112 .m(1)
19113 .n(8)
19114 .k(k)
19115 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080019116 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan902ef7f2021-07-02 16:11:06 -070019117 }
19118 }
19119
19120 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, no_b_zero_point) {
19121 TEST_REQUIRES_X86_AVX2;
19122 for (size_t k = 1; k <= 40; k += 9) {
19123 GemmMicrokernelTester()
19124 .mr(1)
19125 .nr(8)
19126 .kr(8)
19127 .sr(1)
19128 .m(1)
19129 .n(8)
19130 .k(k)
19131 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080019132 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan902ef7f2021-07-02 16:11:06 -070019133 }
19134 }
19135
19136 TEST(QU8_IGEMM_MINMAX_FP32_1X8C8__AVX2, no_zero_point) {
19137 TEST_REQUIRES_X86_AVX2;
19138 for (size_t k = 1; k <= 40; k += 9) {
19139 GemmMicrokernelTester()
19140 .mr(1)
19141 .nr(8)
19142 .kr(8)
19143 .sr(1)
19144 .m(1)
19145 .n(8)
19146 .k(k)
19147 .a_zero_point(0)
19148 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080019149 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qu8_conv_minmax_fp32_avx2_params, xnn_qu8_requantize_fp32);
Marat Dukhan902ef7f2021-07-02 16:11:06 -070019150 }
19151 }
19152#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
19153
19154
19155#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan3cf2e222021-07-08 11:38:45 -070019156 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_eq_8) {
19157 TEST_REQUIRES_X86_AVX512SKX;
19158 GemmMicrokernelTester()
19159 .mr(1)
19160 .nr(16)
19161 .kr(8)
19162 .sr(1)
19163 .m(1)
19164 .n(16)
19165 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080019166 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070019167 }
19168
19169 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, strided_cn) {
19170 TEST_REQUIRES_X86_AVX512SKX;
19171 GemmMicrokernelTester()
19172 .mr(1)
19173 .nr(16)
19174 .kr(8)
19175 .sr(1)
19176 .m(1)
19177 .n(16)
19178 .k(8)
19179 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080019180 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070019181 }
19182
19183 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_eq_8_subtile) {
19184 TEST_REQUIRES_X86_AVX512SKX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080019185 for (uint32_t n = 1; n <= 16; n++) {
19186 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan3cf2e222021-07-08 11:38:45 -070019187 GemmMicrokernelTester()
19188 .mr(1)
19189 .nr(16)
19190 .kr(8)
19191 .sr(1)
19192 .m(m)
19193 .n(n)
19194 .k(8)
19195 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019196 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070019197 }
19198 }
19199 }
19200
19201 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_eq_8_subtile_m) {
19202 TEST_REQUIRES_X86_AVX512SKX;
19203 for (uint32_t m = 1; m <= 1; m++) {
19204 GemmMicrokernelTester()
19205 .mr(1)
19206 .nr(16)
19207 .kr(8)
19208 .sr(1)
19209 .m(m)
19210 .n(16)
19211 .k(8)
19212 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019213 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070019214 }
19215 }
19216
19217 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_eq_8_subtile_n) {
19218 TEST_REQUIRES_X86_AVX512SKX;
19219 for (uint32_t n = 1; n <= 16; n++) {
19220 GemmMicrokernelTester()
19221 .mr(1)
19222 .nr(16)
19223 .kr(8)
19224 .sr(1)
19225 .m(1)
19226 .n(n)
19227 .k(8)
19228 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019229 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070019230 }
19231 }
19232
19233 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_lt_8) {
19234 TEST_REQUIRES_X86_AVX512SKX;
19235 for (size_t k = 1; k < 8; k++) {
19236 GemmMicrokernelTester()
19237 .mr(1)
19238 .nr(16)
19239 .kr(8)
19240 .sr(1)
19241 .m(1)
19242 .n(16)
19243 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080019244 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070019245 }
19246 }
19247
19248 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_lt_8_subtile) {
19249 TEST_REQUIRES_X86_AVX512SKX;
19250 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019251 for (uint32_t n = 1; n <= 16; n++) {
19252 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan3cf2e222021-07-08 11:38:45 -070019253 GemmMicrokernelTester()
19254 .mr(1)
19255 .nr(16)
19256 .kr(8)
19257 .sr(1)
19258 .m(m)
19259 .n(n)
19260 .k(k)
19261 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019262 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070019263 }
19264 }
19265 }
19266 }
19267
19268 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_gt_8) {
19269 TEST_REQUIRES_X86_AVX512SKX;
19270 for (size_t k = 9; k < 16; k++) {
19271 GemmMicrokernelTester()
19272 .mr(1)
19273 .nr(16)
19274 .kr(8)
19275 .sr(1)
19276 .m(1)
19277 .n(16)
19278 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080019279 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070019280 }
19281 }
19282
19283 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_gt_8_subtile) {
19284 TEST_REQUIRES_X86_AVX512SKX;
19285 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019286 for (uint32_t n = 1; n <= 16; n++) {
19287 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan3cf2e222021-07-08 11:38:45 -070019288 GemmMicrokernelTester()
19289 .mr(1)
19290 .nr(16)
19291 .kr(8)
19292 .sr(1)
19293 .m(m)
19294 .n(n)
19295 .k(k)
19296 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019297 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070019298 }
19299 }
19300 }
19301 }
19302
19303 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_div_8) {
19304 TEST_REQUIRES_X86_AVX512SKX;
19305 for (size_t k = 16; k <= 80; k += 8) {
19306 GemmMicrokernelTester()
19307 .mr(1)
19308 .nr(16)
19309 .kr(8)
19310 .sr(1)
19311 .m(1)
19312 .n(16)
19313 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080019314 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070019315 }
19316 }
19317
19318 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_div_8_subtile) {
19319 TEST_REQUIRES_X86_AVX512SKX;
19320 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019321 for (uint32_t n = 1; n <= 16; n++) {
19322 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan3cf2e222021-07-08 11:38:45 -070019323 GemmMicrokernelTester()
19324 .mr(1)
19325 .nr(16)
19326 .kr(8)
19327 .sr(1)
19328 .m(m)
19329 .n(n)
19330 .k(k)
19331 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019332 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070019333 }
19334 }
19335 }
19336 }
19337
19338 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, n_gt_16) {
19339 TEST_REQUIRES_X86_AVX512SKX;
19340 for (uint32_t n = 17; n < 32; n++) {
19341 for (size_t k = 1; k <= 40; k += 9) {
19342 GemmMicrokernelTester()
19343 .mr(1)
19344 .nr(16)
19345 .kr(8)
19346 .sr(1)
19347 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019348 .n(n)
Marat Dukhan3cf2e222021-07-08 11:38:45 -070019349 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080019350 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070019351 }
19352 }
19353 }
19354
19355 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, n_gt_16_strided_cn) {
19356 TEST_REQUIRES_X86_AVX512SKX;
19357 for (uint32_t n = 17; n < 32; n++) {
19358 for (size_t k = 1; k <= 40; k += 9) {
19359 GemmMicrokernelTester()
19360 .mr(1)
19361 .nr(16)
19362 .kr(8)
19363 .sr(1)
19364 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019365 .n(n)
Marat Dukhan3cf2e222021-07-08 11:38:45 -070019366 .k(k)
19367 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080019368 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070019369 }
19370 }
19371 }
19372
19373 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, n_gt_16_subtile) {
19374 TEST_REQUIRES_X86_AVX512SKX;
19375 for (uint32_t n = 17; n < 32; n++) {
19376 for (size_t k = 1; k <= 40; k += 9) {
19377 for (uint32_t m = 1; m <= 1; m++) {
19378 GemmMicrokernelTester()
19379 .mr(1)
19380 .nr(16)
19381 .kr(8)
19382 .sr(1)
19383 .m(m)
19384 .n(n)
19385 .k(k)
19386 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019387 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070019388 }
19389 }
19390 }
19391 }
19392
19393 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, n_div_16) {
19394 TEST_REQUIRES_X86_AVX512SKX;
19395 for (uint32_t n = 32; n <= 48; n += 16) {
19396 for (size_t k = 1; k <= 40; k += 9) {
19397 GemmMicrokernelTester()
19398 .mr(1)
19399 .nr(16)
19400 .kr(8)
19401 .sr(1)
19402 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019403 .n(n)
Marat Dukhan3cf2e222021-07-08 11:38:45 -070019404 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080019405 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070019406 }
19407 }
19408 }
19409
19410 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, n_div_16_strided_cn) {
19411 TEST_REQUIRES_X86_AVX512SKX;
19412 for (uint32_t n = 32; n <= 48; n += 16) {
19413 for (size_t k = 1; k <= 40; k += 9) {
19414 GemmMicrokernelTester()
19415 .mr(1)
19416 .nr(16)
19417 .kr(8)
19418 .sr(1)
19419 .m(1)
19420 .n(n)
19421 .k(k)
19422 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080019423 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070019424 }
19425 }
19426 }
19427
19428 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, n_div_16_subtile) {
19429 TEST_REQUIRES_X86_AVX512SKX;
19430 for (uint32_t n = 32; n <= 48; n += 16) {
19431 for (size_t k = 1; k <= 40; k += 9) {
19432 for (uint32_t m = 1; m <= 1; m++) {
19433 GemmMicrokernelTester()
19434 .mr(1)
19435 .nr(16)
19436 .kr(8)
19437 .sr(1)
19438 .m(m)
19439 .n(n)
19440 .k(k)
19441 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019442 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070019443 }
19444 }
19445 }
19446 }
19447
19448 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, small_kernel) {
19449 TEST_REQUIRES_X86_AVX512SKX;
19450 for (size_t k = 1; k <= 40; k += 9) {
19451 GemmMicrokernelTester()
19452 .mr(1)
19453 .nr(16)
19454 .kr(8)
19455 .sr(1)
19456 .m(1)
19457 .n(16)
19458 .k(k)
19459 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080019460 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070019461 }
19462 }
19463
19464 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, small_kernel_subtile) {
19465 TEST_REQUIRES_X86_AVX512SKX;
19466 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019467 for (uint32_t n = 1; n <= 16; n++) {
19468 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan3cf2e222021-07-08 11:38:45 -070019469 GemmMicrokernelTester()
19470 .mr(1)
19471 .nr(16)
19472 .kr(8)
19473 .sr(1)
19474 .m(m)
19475 .n(n)
19476 .k(k)
19477 .ks(3)
19478 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019479 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070019480 }
19481 }
19482 }
19483 }
19484
19485 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, n_gt_16_small_kernel) {
19486 TEST_REQUIRES_X86_AVX512SKX;
19487 for (uint32_t n = 17; n < 32; n++) {
19488 for (size_t k = 1; k <= 40; k += 9) {
19489 GemmMicrokernelTester()
19490 .mr(1)
19491 .nr(16)
19492 .kr(8)
19493 .sr(1)
19494 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019495 .n(n)
Marat Dukhan3cf2e222021-07-08 11:38:45 -070019496 .k(k)
19497 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080019498 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070019499 }
19500 }
19501 }
19502
19503 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, n_div_16_small_kernel) {
19504 TEST_REQUIRES_X86_AVX512SKX;
19505 for (uint32_t n = 32; n <= 48; n += 16) {
19506 for (size_t k = 1; k <= 40; k += 9) {
19507 GemmMicrokernelTester()
19508 .mr(1)
19509 .nr(16)
19510 .kr(8)
19511 .sr(1)
19512 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019513 .n(n)
Marat Dukhan3cf2e222021-07-08 11:38:45 -070019514 .k(k)
19515 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080019516 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070019517 }
19518 }
19519 }
19520
19521 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, strided_cm_subtile) {
19522 TEST_REQUIRES_X86_AVX512SKX;
19523 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019524 for (uint32_t n = 1; n <= 16; n++) {
19525 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan3cf2e222021-07-08 11:38:45 -070019526 GemmMicrokernelTester()
19527 .mr(1)
19528 .nr(16)
19529 .kr(8)
19530 .sr(1)
19531 .m(m)
19532 .n(n)
19533 .k(k)
19534 .cm_stride(19)
19535 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019536 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070019537 }
19538 }
19539 }
19540 }
19541
19542 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, a_offset) {
19543 TEST_REQUIRES_X86_AVX512SKX;
19544 for (size_t k = 1; k <= 40; k += 9) {
19545 GemmMicrokernelTester()
19546 .mr(1)
19547 .nr(16)
19548 .kr(8)
19549 .sr(1)
19550 .m(1)
19551 .n(16)
19552 .k(k)
19553 .ks(3)
19554 .a_offset(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080019555 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070019556 }
19557 }
19558
19559 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, zero) {
19560 TEST_REQUIRES_X86_AVX512SKX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080019561 for (size_t k = 1; k <= 40; k += 9) {
19562 for (uint32_t mz = 0; mz < 1; mz++) {
Marat Dukhan3cf2e222021-07-08 11:38:45 -070019563 GemmMicrokernelTester()
19564 .mr(1)
19565 .nr(16)
19566 .kr(8)
19567 .sr(1)
19568 .m(1)
19569 .n(16)
19570 .k(k)
19571 .ks(3)
19572 .a_offset(43)
19573 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080019574 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070019575 }
19576 }
19577 }
19578
19579 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, qmin) {
19580 TEST_REQUIRES_X86_AVX512SKX;
19581 GemmMicrokernelTester()
19582 .mr(1)
19583 .nr(16)
19584 .kr(8)
19585 .sr(1)
19586 .m(1)
19587 .n(16)
19588 .k(8)
19589 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019590 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070019591 }
19592
19593 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, qmax) {
19594 TEST_REQUIRES_X86_AVX512SKX;
19595 GemmMicrokernelTester()
19596 .mr(1)
19597 .nr(16)
19598 .kr(8)
19599 .sr(1)
19600 .m(1)
19601 .n(16)
19602 .k(8)
19603 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019604 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070019605 }
19606
19607 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, strided_cm) {
19608 TEST_REQUIRES_X86_AVX512SKX;
19609 GemmMicrokernelTester()
19610 .mr(1)
19611 .nr(16)
19612 .kr(8)
19613 .sr(1)
19614 .m(1)
19615 .n(16)
19616 .k(8)
19617 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080019618 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070019619 }
19620
19621 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, no_a_zero_point) {
19622 TEST_REQUIRES_X86_AVX512SKX;
19623 for (size_t k = 1; k <= 40; k += 9) {
19624 GemmMicrokernelTester()
19625 .mr(1)
19626 .nr(16)
19627 .kr(8)
19628 .sr(1)
19629 .m(1)
19630 .n(16)
19631 .k(k)
19632 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080019633 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070019634 }
19635 }
19636
19637 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, no_b_zero_point) {
19638 TEST_REQUIRES_X86_AVX512SKX;
19639 for (size_t k = 1; k <= 40; k += 9) {
19640 GemmMicrokernelTester()
19641 .mr(1)
19642 .nr(16)
19643 .kr(8)
19644 .sr(1)
19645 .m(1)
19646 .n(16)
19647 .k(k)
19648 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080019649 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070019650 }
19651 }
19652
19653 TEST(QU8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, no_zero_point) {
19654 TEST_REQUIRES_X86_AVX512SKX;
19655 for (size_t k = 1; k <= 40; k += 9) {
19656 GemmMicrokernelTester()
19657 .mr(1)
19658 .nr(16)
19659 .kr(8)
19660 .sr(1)
19661 .m(1)
19662 .n(16)
19663 .k(k)
19664 .a_zero_point(0)
19665 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080019666 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070019667 }
19668 }
19669#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
19670
19671
19672#if XNN_ARCH_X86 || XNN_ARCH_X86_64
19673 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_eq_8) {
19674 TEST_REQUIRES_X86_AVX512SKX;
19675 GemmMicrokernelTester()
19676 .mr(2)
19677 .nr(16)
19678 .kr(8)
19679 .sr(1)
19680 .m(2)
19681 .n(16)
19682 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080019683 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070019684 }
19685
19686 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, strided_cn) {
19687 TEST_REQUIRES_X86_AVX512SKX;
19688 GemmMicrokernelTester()
19689 .mr(2)
19690 .nr(16)
19691 .kr(8)
19692 .sr(1)
19693 .m(2)
19694 .n(16)
19695 .k(8)
19696 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080019697 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070019698 }
19699
19700 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_eq_8_subtile) {
19701 TEST_REQUIRES_X86_AVX512SKX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080019702 for (uint32_t n = 1; n <= 16; n++) {
19703 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan3cf2e222021-07-08 11:38:45 -070019704 GemmMicrokernelTester()
19705 .mr(2)
19706 .nr(16)
19707 .kr(8)
19708 .sr(1)
19709 .m(m)
19710 .n(n)
19711 .k(8)
19712 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019713 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070019714 }
19715 }
19716 }
19717
19718 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_eq_8_subtile_m) {
19719 TEST_REQUIRES_X86_AVX512SKX;
19720 for (uint32_t m = 1; m <= 2; m++) {
19721 GemmMicrokernelTester()
19722 .mr(2)
19723 .nr(16)
19724 .kr(8)
19725 .sr(1)
19726 .m(m)
19727 .n(16)
19728 .k(8)
19729 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019730 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070019731 }
19732 }
19733
19734 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_eq_8_subtile_n) {
19735 TEST_REQUIRES_X86_AVX512SKX;
19736 for (uint32_t n = 1; n <= 16; n++) {
19737 GemmMicrokernelTester()
19738 .mr(2)
19739 .nr(16)
19740 .kr(8)
19741 .sr(1)
19742 .m(2)
19743 .n(n)
19744 .k(8)
19745 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019746 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070019747 }
19748 }
19749
19750 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_lt_8) {
19751 TEST_REQUIRES_X86_AVX512SKX;
19752 for (size_t k = 1; k < 8; k++) {
19753 GemmMicrokernelTester()
19754 .mr(2)
19755 .nr(16)
19756 .kr(8)
19757 .sr(1)
19758 .m(2)
19759 .n(16)
19760 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080019761 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070019762 }
19763 }
19764
19765 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_lt_8_subtile) {
19766 TEST_REQUIRES_X86_AVX512SKX;
19767 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019768 for (uint32_t n = 1; n <= 16; n++) {
19769 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan3cf2e222021-07-08 11:38:45 -070019770 GemmMicrokernelTester()
19771 .mr(2)
19772 .nr(16)
19773 .kr(8)
19774 .sr(1)
19775 .m(m)
19776 .n(n)
19777 .k(k)
19778 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019779 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070019780 }
19781 }
19782 }
19783 }
19784
19785 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_gt_8) {
19786 TEST_REQUIRES_X86_AVX512SKX;
19787 for (size_t k = 9; k < 16; k++) {
19788 GemmMicrokernelTester()
19789 .mr(2)
19790 .nr(16)
19791 .kr(8)
19792 .sr(1)
19793 .m(2)
19794 .n(16)
19795 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080019796 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070019797 }
19798 }
19799
19800 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_gt_8_subtile) {
19801 TEST_REQUIRES_X86_AVX512SKX;
19802 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019803 for (uint32_t n = 1; n <= 16; n++) {
19804 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan3cf2e222021-07-08 11:38:45 -070019805 GemmMicrokernelTester()
19806 .mr(2)
19807 .nr(16)
19808 .kr(8)
19809 .sr(1)
19810 .m(m)
19811 .n(n)
19812 .k(k)
19813 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019814 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070019815 }
19816 }
19817 }
19818 }
19819
19820 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_div_8) {
19821 TEST_REQUIRES_X86_AVX512SKX;
19822 for (size_t k = 16; k <= 80; k += 8) {
19823 GemmMicrokernelTester()
19824 .mr(2)
19825 .nr(16)
19826 .kr(8)
19827 .sr(1)
19828 .m(2)
19829 .n(16)
19830 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080019831 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070019832 }
19833 }
19834
19835 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_div_8_subtile) {
19836 TEST_REQUIRES_X86_AVX512SKX;
19837 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019838 for (uint32_t n = 1; n <= 16; n++) {
19839 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan3cf2e222021-07-08 11:38:45 -070019840 GemmMicrokernelTester()
19841 .mr(2)
19842 .nr(16)
19843 .kr(8)
19844 .sr(1)
19845 .m(m)
19846 .n(n)
19847 .k(k)
19848 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019849 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070019850 }
19851 }
19852 }
19853 }
19854
19855 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_gt_16) {
19856 TEST_REQUIRES_X86_AVX512SKX;
19857 for (uint32_t n = 17; n < 32; n++) {
19858 for (size_t k = 1; k <= 40; k += 9) {
19859 GemmMicrokernelTester()
19860 .mr(2)
19861 .nr(16)
19862 .kr(8)
19863 .sr(1)
19864 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019865 .n(n)
Marat Dukhan3cf2e222021-07-08 11:38:45 -070019866 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080019867 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070019868 }
19869 }
19870 }
19871
19872 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_gt_16_strided_cn) {
19873 TEST_REQUIRES_X86_AVX512SKX;
19874 for (uint32_t n = 17; n < 32; n++) {
19875 for (size_t k = 1; k <= 40; k += 9) {
19876 GemmMicrokernelTester()
19877 .mr(2)
19878 .nr(16)
19879 .kr(8)
19880 .sr(1)
19881 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019882 .n(n)
Marat Dukhan3cf2e222021-07-08 11:38:45 -070019883 .k(k)
19884 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080019885 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070019886 }
19887 }
19888 }
19889
19890 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_gt_16_subtile) {
19891 TEST_REQUIRES_X86_AVX512SKX;
19892 for (uint32_t n = 17; n < 32; n++) {
19893 for (size_t k = 1; k <= 40; k += 9) {
19894 for (uint32_t m = 1; m <= 2; m++) {
19895 GemmMicrokernelTester()
19896 .mr(2)
19897 .nr(16)
19898 .kr(8)
19899 .sr(1)
19900 .m(m)
19901 .n(n)
19902 .k(k)
19903 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019904 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070019905 }
19906 }
19907 }
19908 }
19909
19910 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_div_16) {
19911 TEST_REQUIRES_X86_AVX512SKX;
19912 for (uint32_t n = 32; n <= 48; n += 16) {
19913 for (size_t k = 1; k <= 40; k += 9) {
19914 GemmMicrokernelTester()
19915 .mr(2)
19916 .nr(16)
19917 .kr(8)
19918 .sr(1)
19919 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019920 .n(n)
Marat Dukhan3cf2e222021-07-08 11:38:45 -070019921 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080019922 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070019923 }
19924 }
19925 }
19926
19927 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_div_16_strided_cn) {
19928 TEST_REQUIRES_X86_AVX512SKX;
19929 for (uint32_t n = 32; n <= 48; n += 16) {
19930 for (size_t k = 1; k <= 40; k += 9) {
19931 GemmMicrokernelTester()
19932 .mr(2)
19933 .nr(16)
19934 .kr(8)
19935 .sr(1)
19936 .m(2)
19937 .n(n)
19938 .k(k)
19939 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080019940 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070019941 }
19942 }
19943 }
19944
19945 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_div_16_subtile) {
19946 TEST_REQUIRES_X86_AVX512SKX;
19947 for (uint32_t n = 32; n <= 48; n += 16) {
19948 for (size_t k = 1; k <= 40; k += 9) {
19949 for (uint32_t m = 1; m <= 2; m++) {
19950 GemmMicrokernelTester()
19951 .mr(2)
19952 .nr(16)
19953 .kr(8)
19954 .sr(1)
19955 .m(m)
19956 .n(n)
19957 .k(k)
19958 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019959 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070019960 }
19961 }
19962 }
19963 }
19964
19965 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, small_kernel) {
19966 TEST_REQUIRES_X86_AVX512SKX;
19967 for (size_t k = 1; k <= 40; k += 9) {
19968 GemmMicrokernelTester()
19969 .mr(2)
19970 .nr(16)
19971 .kr(8)
19972 .sr(1)
19973 .m(2)
19974 .n(16)
19975 .k(k)
19976 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080019977 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070019978 }
19979 }
19980
19981 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, small_kernel_subtile) {
19982 TEST_REQUIRES_X86_AVX512SKX;
19983 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019984 for (uint32_t n = 1; n <= 16; n++) {
19985 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan3cf2e222021-07-08 11:38:45 -070019986 GemmMicrokernelTester()
19987 .mr(2)
19988 .nr(16)
19989 .kr(8)
19990 .sr(1)
19991 .m(m)
19992 .n(n)
19993 .k(k)
19994 .ks(3)
19995 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019996 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070019997 }
19998 }
19999 }
20000 }
20001
20002 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_gt_16_small_kernel) {
20003 TEST_REQUIRES_X86_AVX512SKX;
20004 for (uint32_t n = 17; n < 32; n++) {
20005 for (size_t k = 1; k <= 40; k += 9) {
20006 GemmMicrokernelTester()
20007 .mr(2)
20008 .nr(16)
20009 .kr(8)
20010 .sr(1)
20011 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020012 .n(n)
Marat Dukhan3cf2e222021-07-08 11:38:45 -070020013 .k(k)
20014 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080020015 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070020016 }
20017 }
20018 }
20019
20020 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_div_16_small_kernel) {
20021 TEST_REQUIRES_X86_AVX512SKX;
20022 for (uint32_t n = 32; n <= 48; n += 16) {
20023 for (size_t k = 1; k <= 40; k += 9) {
20024 GemmMicrokernelTester()
20025 .mr(2)
20026 .nr(16)
20027 .kr(8)
20028 .sr(1)
20029 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020030 .n(n)
Marat Dukhan3cf2e222021-07-08 11:38:45 -070020031 .k(k)
20032 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080020033 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070020034 }
20035 }
20036 }
20037
20038 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, strided_cm_subtile) {
20039 TEST_REQUIRES_X86_AVX512SKX;
20040 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020041 for (uint32_t n = 1; n <= 16; n++) {
20042 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan3cf2e222021-07-08 11:38:45 -070020043 GemmMicrokernelTester()
20044 .mr(2)
20045 .nr(16)
20046 .kr(8)
20047 .sr(1)
20048 .m(m)
20049 .n(n)
20050 .k(k)
20051 .cm_stride(19)
20052 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020053 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070020054 }
20055 }
20056 }
20057 }
20058
20059 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, a_offset) {
20060 TEST_REQUIRES_X86_AVX512SKX;
20061 for (size_t k = 1; k <= 40; k += 9) {
20062 GemmMicrokernelTester()
20063 .mr(2)
20064 .nr(16)
20065 .kr(8)
20066 .sr(1)
20067 .m(2)
20068 .n(16)
20069 .k(k)
20070 .ks(3)
20071 .a_offset(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080020072 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070020073 }
20074 }
20075
20076 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, zero) {
20077 TEST_REQUIRES_X86_AVX512SKX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080020078 for (size_t k = 1; k <= 40; k += 9) {
20079 for (uint32_t mz = 0; mz < 2; mz++) {
Marat Dukhan3cf2e222021-07-08 11:38:45 -070020080 GemmMicrokernelTester()
20081 .mr(2)
20082 .nr(16)
20083 .kr(8)
20084 .sr(1)
20085 .m(2)
20086 .n(16)
20087 .k(k)
20088 .ks(3)
20089 .a_offset(83)
20090 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080020091 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070020092 }
20093 }
20094 }
20095
20096 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, qmin) {
20097 TEST_REQUIRES_X86_AVX512SKX;
20098 GemmMicrokernelTester()
20099 .mr(2)
20100 .nr(16)
20101 .kr(8)
20102 .sr(1)
20103 .m(2)
20104 .n(16)
20105 .k(8)
20106 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020107 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070020108 }
20109
20110 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, qmax) {
20111 TEST_REQUIRES_X86_AVX512SKX;
20112 GemmMicrokernelTester()
20113 .mr(2)
20114 .nr(16)
20115 .kr(8)
20116 .sr(1)
20117 .m(2)
20118 .n(16)
20119 .k(8)
20120 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020121 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070020122 }
20123
20124 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, strided_cm) {
20125 TEST_REQUIRES_X86_AVX512SKX;
20126 GemmMicrokernelTester()
20127 .mr(2)
20128 .nr(16)
20129 .kr(8)
20130 .sr(1)
20131 .m(2)
20132 .n(16)
20133 .k(8)
20134 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080020135 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070020136 }
20137
20138 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, no_a_zero_point) {
20139 TEST_REQUIRES_X86_AVX512SKX;
20140 for (size_t k = 1; k <= 40; k += 9) {
20141 GemmMicrokernelTester()
20142 .mr(2)
20143 .nr(16)
20144 .kr(8)
20145 .sr(1)
20146 .m(2)
20147 .n(16)
20148 .k(k)
20149 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080020150 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070020151 }
20152 }
20153
20154 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, no_b_zero_point) {
20155 TEST_REQUIRES_X86_AVX512SKX;
20156 for (size_t k = 1; k <= 40; k += 9) {
20157 GemmMicrokernelTester()
20158 .mr(2)
20159 .nr(16)
20160 .kr(8)
20161 .sr(1)
20162 .m(2)
20163 .n(16)
20164 .k(k)
20165 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080020166 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070020167 }
20168 }
20169
20170 TEST(QU8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, no_zero_point) {
20171 TEST_REQUIRES_X86_AVX512SKX;
20172 for (size_t k = 1; k <= 40; k += 9) {
20173 GemmMicrokernelTester()
20174 .mr(2)
20175 .nr(16)
20176 .kr(8)
20177 .sr(1)
20178 .m(2)
20179 .n(16)
20180 .k(k)
20181 .a_zero_point(0)
20182 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080020183 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070020184 }
20185 }
20186#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
20187
20188
20189#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan3cf2e222021-07-08 11:38:45 -070020190 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_eq_8) {
20191 TEST_REQUIRES_X86_AVX512SKX;
20192 GemmMicrokernelTester()
20193 .mr(4)
20194 .nr(16)
20195 .kr(8)
20196 .sr(1)
20197 .m(4)
20198 .n(16)
20199 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080020200 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070020201 }
20202
20203 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, strided_cn) {
20204 TEST_REQUIRES_X86_AVX512SKX;
20205 GemmMicrokernelTester()
20206 .mr(4)
20207 .nr(16)
20208 .kr(8)
20209 .sr(1)
20210 .m(4)
20211 .n(16)
20212 .k(8)
20213 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080020214 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070020215 }
20216
20217 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_eq_8_subtile) {
20218 TEST_REQUIRES_X86_AVX512SKX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080020219 for (uint32_t n = 1; n <= 16; n++) {
20220 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan3cf2e222021-07-08 11:38:45 -070020221 GemmMicrokernelTester()
20222 .mr(4)
20223 .nr(16)
20224 .kr(8)
20225 .sr(1)
20226 .m(m)
20227 .n(n)
20228 .k(8)
20229 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020230 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070020231 }
20232 }
20233 }
20234
20235 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_eq_8_subtile_m) {
20236 TEST_REQUIRES_X86_AVX512SKX;
20237 for (uint32_t m = 1; m <= 4; m++) {
20238 GemmMicrokernelTester()
20239 .mr(4)
20240 .nr(16)
20241 .kr(8)
20242 .sr(1)
20243 .m(m)
20244 .n(16)
20245 .k(8)
20246 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020247 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070020248 }
20249 }
20250
20251 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_eq_8_subtile_n) {
20252 TEST_REQUIRES_X86_AVX512SKX;
20253 for (uint32_t n = 1; n <= 16; n++) {
20254 GemmMicrokernelTester()
20255 .mr(4)
20256 .nr(16)
20257 .kr(8)
20258 .sr(1)
20259 .m(4)
20260 .n(n)
20261 .k(8)
20262 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020263 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070020264 }
20265 }
20266
20267 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_lt_8) {
20268 TEST_REQUIRES_X86_AVX512SKX;
20269 for (size_t k = 1; k < 8; k++) {
20270 GemmMicrokernelTester()
20271 .mr(4)
20272 .nr(16)
20273 .kr(8)
20274 .sr(1)
20275 .m(4)
20276 .n(16)
20277 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080020278 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070020279 }
20280 }
20281
20282 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_lt_8_subtile) {
20283 TEST_REQUIRES_X86_AVX512SKX;
20284 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020285 for (uint32_t n = 1; n <= 16; n++) {
20286 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan3cf2e222021-07-08 11:38:45 -070020287 GemmMicrokernelTester()
20288 .mr(4)
20289 .nr(16)
20290 .kr(8)
20291 .sr(1)
20292 .m(m)
20293 .n(n)
20294 .k(k)
20295 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020296 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070020297 }
20298 }
20299 }
20300 }
20301
20302 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_gt_8) {
20303 TEST_REQUIRES_X86_AVX512SKX;
20304 for (size_t k = 9; k < 16; k++) {
20305 GemmMicrokernelTester()
20306 .mr(4)
20307 .nr(16)
20308 .kr(8)
20309 .sr(1)
20310 .m(4)
20311 .n(16)
20312 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080020313 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070020314 }
20315 }
20316
20317 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_gt_8_subtile) {
20318 TEST_REQUIRES_X86_AVX512SKX;
20319 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020320 for (uint32_t n = 1; n <= 16; n++) {
20321 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan3cf2e222021-07-08 11:38:45 -070020322 GemmMicrokernelTester()
20323 .mr(4)
20324 .nr(16)
20325 .kr(8)
20326 .sr(1)
20327 .m(m)
20328 .n(n)
20329 .k(k)
20330 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020331 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070020332 }
20333 }
20334 }
20335 }
20336
20337 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_div_8) {
20338 TEST_REQUIRES_X86_AVX512SKX;
20339 for (size_t k = 16; k <= 80; k += 8) {
20340 GemmMicrokernelTester()
20341 .mr(4)
20342 .nr(16)
20343 .kr(8)
20344 .sr(1)
20345 .m(4)
20346 .n(16)
20347 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080020348 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070020349 }
20350 }
20351
20352 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_div_8_subtile) {
20353 TEST_REQUIRES_X86_AVX512SKX;
20354 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020355 for (uint32_t n = 1; n <= 16; n++) {
20356 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan3cf2e222021-07-08 11:38:45 -070020357 GemmMicrokernelTester()
20358 .mr(4)
20359 .nr(16)
20360 .kr(8)
20361 .sr(1)
20362 .m(m)
20363 .n(n)
20364 .k(k)
20365 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020366 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070020367 }
20368 }
20369 }
20370 }
20371
20372 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, n_gt_16) {
20373 TEST_REQUIRES_X86_AVX512SKX;
20374 for (uint32_t n = 17; n < 32; n++) {
20375 for (size_t k = 1; k <= 40; k += 9) {
20376 GemmMicrokernelTester()
20377 .mr(4)
20378 .nr(16)
20379 .kr(8)
20380 .sr(1)
20381 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020382 .n(n)
Marat Dukhan3cf2e222021-07-08 11:38:45 -070020383 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080020384 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070020385 }
20386 }
20387 }
20388
20389 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, n_gt_16_strided_cn) {
20390 TEST_REQUIRES_X86_AVX512SKX;
20391 for (uint32_t n = 17; n < 32; n++) {
20392 for (size_t k = 1; k <= 40; k += 9) {
20393 GemmMicrokernelTester()
20394 .mr(4)
20395 .nr(16)
20396 .kr(8)
20397 .sr(1)
20398 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020399 .n(n)
Marat Dukhan3cf2e222021-07-08 11:38:45 -070020400 .k(k)
20401 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080020402 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070020403 }
20404 }
20405 }
20406
20407 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, n_gt_16_subtile) {
20408 TEST_REQUIRES_X86_AVX512SKX;
20409 for (uint32_t n = 17; n < 32; n++) {
20410 for (size_t k = 1; k <= 40; k += 9) {
20411 for (uint32_t m = 1; m <= 4; m++) {
20412 GemmMicrokernelTester()
20413 .mr(4)
20414 .nr(16)
20415 .kr(8)
20416 .sr(1)
20417 .m(m)
20418 .n(n)
20419 .k(k)
20420 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020421 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070020422 }
20423 }
20424 }
20425 }
20426
20427 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, n_div_16) {
20428 TEST_REQUIRES_X86_AVX512SKX;
20429 for (uint32_t n = 32; n <= 48; n += 16) {
20430 for (size_t k = 1; k <= 40; k += 9) {
20431 GemmMicrokernelTester()
20432 .mr(4)
20433 .nr(16)
20434 .kr(8)
20435 .sr(1)
20436 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020437 .n(n)
Marat Dukhan3cf2e222021-07-08 11:38:45 -070020438 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080020439 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070020440 }
20441 }
20442 }
20443
20444 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, n_div_16_strided_cn) {
20445 TEST_REQUIRES_X86_AVX512SKX;
20446 for (uint32_t n = 32; n <= 48; n += 16) {
20447 for (size_t k = 1; k <= 40; k += 9) {
20448 GemmMicrokernelTester()
20449 .mr(4)
20450 .nr(16)
20451 .kr(8)
20452 .sr(1)
20453 .m(4)
20454 .n(n)
20455 .k(k)
20456 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080020457 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070020458 }
20459 }
20460 }
20461
20462 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, n_div_16_subtile) {
20463 TEST_REQUIRES_X86_AVX512SKX;
20464 for (uint32_t n = 32; n <= 48; n += 16) {
20465 for (size_t k = 1; k <= 40; k += 9) {
20466 for (uint32_t m = 1; m <= 4; m++) {
20467 GemmMicrokernelTester()
20468 .mr(4)
20469 .nr(16)
20470 .kr(8)
20471 .sr(1)
20472 .m(m)
20473 .n(n)
20474 .k(k)
20475 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020476 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070020477 }
20478 }
20479 }
20480 }
20481
20482 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, small_kernel) {
20483 TEST_REQUIRES_X86_AVX512SKX;
20484 for (size_t k = 1; k <= 40; k += 9) {
20485 GemmMicrokernelTester()
20486 .mr(4)
20487 .nr(16)
20488 .kr(8)
20489 .sr(1)
20490 .m(4)
20491 .n(16)
20492 .k(k)
20493 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080020494 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070020495 }
20496 }
20497
20498 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, small_kernel_subtile) {
20499 TEST_REQUIRES_X86_AVX512SKX;
20500 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020501 for (uint32_t n = 1; n <= 16; n++) {
20502 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan3cf2e222021-07-08 11:38:45 -070020503 GemmMicrokernelTester()
20504 .mr(4)
20505 .nr(16)
20506 .kr(8)
20507 .sr(1)
20508 .m(m)
20509 .n(n)
20510 .k(k)
20511 .ks(3)
20512 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020513 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070020514 }
20515 }
20516 }
20517 }
20518
20519 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, n_gt_16_small_kernel) {
20520 TEST_REQUIRES_X86_AVX512SKX;
20521 for (uint32_t n = 17; n < 32; n++) {
20522 for (size_t k = 1; k <= 40; k += 9) {
20523 GemmMicrokernelTester()
20524 .mr(4)
20525 .nr(16)
20526 .kr(8)
20527 .sr(1)
20528 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020529 .n(n)
Marat Dukhan3cf2e222021-07-08 11:38:45 -070020530 .k(k)
20531 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080020532 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070020533 }
20534 }
20535 }
20536
20537 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, n_div_16_small_kernel) {
20538 TEST_REQUIRES_X86_AVX512SKX;
20539 for (uint32_t n = 32; n <= 48; n += 16) {
20540 for (size_t k = 1; k <= 40; k += 9) {
20541 GemmMicrokernelTester()
20542 .mr(4)
20543 .nr(16)
20544 .kr(8)
20545 .sr(1)
20546 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020547 .n(n)
Marat Dukhan3cf2e222021-07-08 11:38:45 -070020548 .k(k)
20549 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080020550 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070020551 }
20552 }
20553 }
20554
20555 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, strided_cm_subtile) {
20556 TEST_REQUIRES_X86_AVX512SKX;
20557 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020558 for (uint32_t n = 1; n <= 16; n++) {
20559 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan3cf2e222021-07-08 11:38:45 -070020560 GemmMicrokernelTester()
20561 .mr(4)
20562 .nr(16)
20563 .kr(8)
20564 .sr(1)
20565 .m(m)
20566 .n(n)
20567 .k(k)
20568 .cm_stride(19)
20569 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020570 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070020571 }
20572 }
20573 }
20574 }
20575
20576 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, a_offset) {
20577 TEST_REQUIRES_X86_AVX512SKX;
20578 for (size_t k = 1; k <= 40; k += 9) {
20579 GemmMicrokernelTester()
20580 .mr(4)
20581 .nr(16)
20582 .kr(8)
20583 .sr(1)
20584 .m(4)
20585 .n(16)
20586 .k(k)
20587 .ks(3)
20588 .a_offset(163)
Marat Dukhan50323b82022-01-11 00:12:01 -080020589 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070020590 }
20591 }
20592
20593 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, zero) {
20594 TEST_REQUIRES_X86_AVX512SKX;
Zhi An Ng83844ae2022-01-14 09:52:25 -080020595 for (size_t k = 1; k <= 40; k += 9) {
20596 for (uint32_t mz = 0; mz < 4; mz++) {
Marat Dukhan3cf2e222021-07-08 11:38:45 -070020597 GemmMicrokernelTester()
20598 .mr(4)
20599 .nr(16)
20600 .kr(8)
20601 .sr(1)
20602 .m(4)
20603 .n(16)
20604 .k(k)
20605 .ks(3)
20606 .a_offset(163)
20607 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080020608 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070020609 }
20610 }
20611 }
20612
20613 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, qmin) {
20614 TEST_REQUIRES_X86_AVX512SKX;
20615 GemmMicrokernelTester()
20616 .mr(4)
20617 .nr(16)
20618 .kr(8)
20619 .sr(1)
20620 .m(4)
20621 .n(16)
20622 .k(8)
20623 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020624 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070020625 }
20626
20627 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, qmax) {
20628 TEST_REQUIRES_X86_AVX512SKX;
20629 GemmMicrokernelTester()
20630 .mr(4)
20631 .nr(16)
20632 .kr(8)
20633 .sr(1)
20634 .m(4)
20635 .n(16)
20636 .k(8)
20637 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020638 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070020639 }
20640
20641 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, strided_cm) {
20642 TEST_REQUIRES_X86_AVX512SKX;
20643 GemmMicrokernelTester()
20644 .mr(4)
20645 .nr(16)
20646 .kr(8)
20647 .sr(1)
20648 .m(4)
20649 .n(16)
20650 .k(8)
20651 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080020652 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070020653 }
20654
20655 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, no_a_zero_point) {
20656 TEST_REQUIRES_X86_AVX512SKX;
20657 for (size_t k = 1; k <= 40; k += 9) {
20658 GemmMicrokernelTester()
20659 .mr(4)
20660 .nr(16)
20661 .kr(8)
20662 .sr(1)
20663 .m(4)
20664 .n(16)
20665 .k(k)
20666 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080020667 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070020668 }
20669 }
20670
20671 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, no_b_zero_point) {
20672 TEST_REQUIRES_X86_AVX512SKX;
20673 for (size_t k = 1; k <= 40; k += 9) {
20674 GemmMicrokernelTester()
20675 .mr(4)
20676 .nr(16)
20677 .kr(8)
20678 .sr(1)
20679 .m(4)
20680 .n(16)
20681 .k(k)
20682 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080020683 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070020684 }
20685 }
20686
20687 TEST(QU8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, no_zero_point) {
20688 TEST_REQUIRES_X86_AVX512SKX;
20689 for (size_t k = 1; k <= 40; k += 9) {
20690 GemmMicrokernelTester()
20691 .mr(4)
20692 .nr(16)
20693 .kr(8)
20694 .sr(1)
20695 .m(4)
20696 .n(16)
20697 .k(k)
20698 .a_zero_point(0)
20699 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080020700 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qu8_conv_minmax_fp32_avx512_params, xnn_qu8_requantize_fp32);
Marat Dukhan3cf2e222021-07-08 11:38:45 -070020701 }
20702 }
20703#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan43bee052021-07-14 20:57:18 -070020704
20705
Marat Dukhan4c617792021-12-21 15:47:58 -080020706#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan8dc106e2021-08-31 15:23:02 -070020707 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8) {
20708 GemmMicrokernelTester()
20709 .mr(1)
20710 .nr(4)
20711 .kr(2)
20712 .sr(1)
20713 .m(1)
20714 .n(4)
20715 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080020716 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070020717 }
20718
20719 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, strided_cn) {
20720 GemmMicrokernelTester()
20721 .mr(1)
20722 .nr(4)
20723 .kr(2)
20724 .sr(1)
20725 .m(1)
20726 .n(4)
20727 .k(8)
20728 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080020729 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070020730 }
20731
20732 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020733 for (uint32_t n = 1; n <= 4; n++) {
20734 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070020735 GemmMicrokernelTester()
20736 .mr(1)
20737 .nr(4)
20738 .kr(2)
20739 .sr(1)
20740 .m(m)
20741 .n(n)
20742 .k(8)
20743 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020744 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070020745 }
20746 }
20747 }
20748
20749 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
20750 for (uint32_t m = 1; m <= 1; m++) {
20751 GemmMicrokernelTester()
20752 .mr(1)
20753 .nr(4)
20754 .kr(2)
20755 .sr(1)
20756 .m(m)
20757 .n(4)
20758 .k(8)
20759 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020760 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070020761 }
20762 }
20763
20764 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
20765 for (uint32_t n = 1; n <= 4; n++) {
20766 GemmMicrokernelTester()
20767 .mr(1)
20768 .nr(4)
20769 .kr(2)
20770 .sr(1)
20771 .m(1)
20772 .n(n)
20773 .k(8)
20774 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020775 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070020776 }
20777 }
20778
20779 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8) {
20780 for (size_t k = 1; k < 8; k++) {
20781 GemmMicrokernelTester()
20782 .mr(1)
20783 .nr(4)
20784 .kr(2)
20785 .sr(1)
20786 .m(1)
20787 .n(4)
20788 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080020789 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070020790 }
20791 }
20792
20793 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
20794 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020795 for (uint32_t n = 1; n <= 4; n++) {
20796 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070020797 GemmMicrokernelTester()
20798 .mr(1)
20799 .nr(4)
20800 .kr(2)
20801 .sr(1)
20802 .m(m)
20803 .n(n)
20804 .k(k)
20805 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020806 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070020807 }
20808 }
20809 }
20810 }
20811
20812 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8) {
20813 for (size_t k = 9; k < 16; k++) {
20814 GemmMicrokernelTester()
20815 .mr(1)
20816 .nr(4)
20817 .kr(2)
20818 .sr(1)
20819 .m(1)
20820 .n(4)
20821 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080020822 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070020823 }
20824 }
20825
20826 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
20827 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020828 for (uint32_t n = 1; n <= 4; n++) {
20829 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070020830 GemmMicrokernelTester()
20831 .mr(1)
20832 .nr(4)
20833 .kr(2)
20834 .sr(1)
20835 .m(m)
20836 .n(n)
20837 .k(k)
20838 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020839 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070020840 }
20841 }
20842 }
20843 }
20844
20845 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_div_8) {
20846 for (size_t k = 16; k <= 80; k += 8) {
20847 GemmMicrokernelTester()
20848 .mr(1)
20849 .nr(4)
20850 .kr(2)
20851 .sr(1)
20852 .m(1)
20853 .n(4)
20854 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080020855 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070020856 }
20857 }
20858
20859 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
20860 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020861 for (uint32_t n = 1; n <= 4; n++) {
20862 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070020863 GemmMicrokernelTester()
20864 .mr(1)
20865 .nr(4)
20866 .kr(2)
20867 .sr(1)
20868 .m(m)
20869 .n(n)
20870 .k(k)
20871 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020872 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070020873 }
20874 }
20875 }
20876 }
20877
20878 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4) {
20879 for (uint32_t n = 5; n < 8; n++) {
20880 for (size_t k = 1; k <= 40; k += 9) {
20881 GemmMicrokernelTester()
20882 .mr(1)
20883 .nr(4)
20884 .kr(2)
20885 .sr(1)
20886 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020887 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070020888 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080020889 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070020890 }
20891 }
20892 }
20893
20894 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
20895 for (uint32_t n = 5; n < 8; n++) {
20896 for (size_t k = 1; k <= 40; k += 9) {
20897 GemmMicrokernelTester()
20898 .mr(1)
20899 .nr(4)
20900 .kr(2)
20901 .sr(1)
20902 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020903 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070020904 .k(k)
20905 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080020906 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070020907 }
20908 }
20909 }
20910
20911 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
20912 for (uint32_t n = 5; n < 8; n++) {
20913 for (size_t k = 1; k <= 40; k += 9) {
20914 for (uint32_t m = 1; m <= 1; m++) {
20915 GemmMicrokernelTester()
20916 .mr(1)
20917 .nr(4)
20918 .kr(2)
20919 .sr(1)
20920 .m(m)
20921 .n(n)
20922 .k(k)
20923 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020924 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070020925 }
20926 }
20927 }
20928 }
20929
20930 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, n_div_4) {
20931 for (uint32_t n = 8; n <= 12; n += 4) {
20932 for (size_t k = 1; k <= 40; k += 9) {
20933 GemmMicrokernelTester()
20934 .mr(1)
20935 .nr(4)
20936 .kr(2)
20937 .sr(1)
20938 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020939 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070020940 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080020941 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070020942 }
20943 }
20944 }
20945
20946 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
20947 for (uint32_t n = 8; n <= 12; n += 4) {
20948 for (size_t k = 1; k <= 40; k += 9) {
20949 GemmMicrokernelTester()
20950 .mr(1)
20951 .nr(4)
20952 .kr(2)
20953 .sr(1)
20954 .m(1)
20955 .n(n)
20956 .k(k)
20957 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080020958 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070020959 }
20960 }
20961 }
20962
20963 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
20964 for (uint32_t n = 8; n <= 12; n += 4) {
20965 for (size_t k = 1; k <= 40; k += 9) {
20966 for (uint32_t m = 1; m <= 1; m++) {
20967 GemmMicrokernelTester()
20968 .mr(1)
20969 .nr(4)
20970 .kr(2)
20971 .sr(1)
20972 .m(m)
20973 .n(n)
20974 .k(k)
20975 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020976 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070020977 }
20978 }
20979 }
20980 }
20981
20982 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, small_kernel) {
20983 for (size_t k = 1; k <= 40; k += 9) {
20984 GemmMicrokernelTester()
20985 .mr(1)
20986 .nr(4)
20987 .kr(2)
20988 .sr(1)
20989 .m(1)
20990 .n(4)
20991 .k(k)
20992 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080020993 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070020994 }
20995 }
20996
20997 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, small_kernel_subtile) {
20998 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020999 for (uint32_t n = 1; n <= 4; n++) {
21000 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021001 GemmMicrokernelTester()
21002 .mr(1)
21003 .nr(4)
21004 .kr(2)
21005 .sr(1)
21006 .m(m)
21007 .n(n)
21008 .k(k)
21009 .ks(3)
21010 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021011 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021012 }
21013 }
21014 }
21015 }
21016
21017 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_small_kernel) {
21018 for (uint32_t n = 5; n < 8; n++) {
21019 for (size_t k = 1; k <= 40; k += 9) {
21020 GemmMicrokernelTester()
21021 .mr(1)
21022 .nr(4)
21023 .kr(2)
21024 .sr(1)
21025 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021026 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021027 .k(k)
21028 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080021029 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021030 }
21031 }
21032 }
21033
21034 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_small_kernel) {
21035 for (uint32_t n = 8; n <= 12; n += 4) {
21036 for (size_t k = 1; k <= 40; k += 9) {
21037 GemmMicrokernelTester()
21038 .mr(1)
21039 .nr(4)
21040 .kr(2)
21041 .sr(1)
21042 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021043 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021044 .k(k)
21045 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080021046 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021047 }
21048 }
21049 }
21050
21051 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
21052 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021053 for (uint32_t n = 1; n <= 4; n++) {
21054 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021055 GemmMicrokernelTester()
21056 .mr(1)
21057 .nr(4)
21058 .kr(2)
21059 .sr(1)
21060 .m(m)
21061 .n(n)
21062 .k(k)
21063 .cm_stride(7)
21064 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021065 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021066 }
21067 }
21068 }
21069 }
21070
21071 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, a_offset) {
21072 for (size_t k = 1; k <= 40; k += 9) {
21073 GemmMicrokernelTester()
21074 .mr(1)
21075 .nr(4)
21076 .kr(2)
21077 .sr(1)
21078 .m(1)
21079 .n(4)
21080 .k(k)
21081 .ks(3)
21082 .a_offset(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080021083 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021084 }
21085 }
21086
21087 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021088 for (size_t k = 1; k <= 40; k += 9) {
21089 for (uint32_t mz = 0; mz < 1; mz++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021090 GemmMicrokernelTester()
21091 .mr(1)
21092 .nr(4)
21093 .kr(2)
21094 .sr(1)
21095 .m(1)
21096 .n(4)
21097 .k(k)
21098 .ks(3)
21099 .a_offset(43)
21100 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080021101 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021102 }
21103 }
21104 }
21105
21106 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, qmin) {
21107 GemmMicrokernelTester()
21108 .mr(1)
21109 .nr(4)
21110 .kr(2)
21111 .sr(1)
21112 .m(1)
21113 .n(4)
21114 .k(8)
21115 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021116 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021117 }
21118
21119 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, qmax) {
21120 GemmMicrokernelTester()
21121 .mr(1)
21122 .nr(4)
21123 .kr(2)
21124 .sr(1)
21125 .m(1)
21126 .n(4)
21127 .k(8)
21128 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021129 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021130 }
21131
21132 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, strided_cm) {
21133 GemmMicrokernelTester()
21134 .mr(1)
21135 .nr(4)
21136 .kr(2)
21137 .sr(1)
21138 .m(1)
21139 .n(4)
21140 .k(8)
21141 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080021142 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021143 }
21144
21145 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, no_a_zero_point) {
21146 for (size_t k = 1; k <= 40; k += 9) {
21147 GemmMicrokernelTester()
21148 .mr(1)
21149 .nr(4)
21150 .kr(2)
21151 .sr(1)
21152 .m(1)
21153 .n(4)
21154 .k(k)
21155 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080021156 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021157 }
21158 }
21159
21160 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, no_b_zero_point) {
21161 for (size_t k = 1; k <= 40; k += 9) {
21162 GemmMicrokernelTester()
21163 .mr(1)
21164 .nr(4)
21165 .kr(2)
21166 .sr(1)
21167 .m(1)
21168 .n(4)
21169 .k(k)
21170 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080021171 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021172 }
21173 }
21174
21175 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, no_zero_point) {
21176 for (size_t k = 1; k <= 40; k += 9) {
21177 GemmMicrokernelTester()
21178 .mr(1)
21179 .nr(4)
21180 .kr(2)
21181 .sr(1)
21182 .m(1)
21183 .n(4)
21184 .k(k)
21185 .a_zero_point(0)
21186 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080021187 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021188 }
21189 }
Marat Dukhan4c617792021-12-21 15:47:58 -080021190#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021191
21192
Marat Dukhan4c617792021-12-21 15:47:58 -080021193#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021194 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8) {
21195 GemmMicrokernelTester()
21196 .mr(3)
21197 .nr(4)
21198 .kr(2)
21199 .sr(1)
21200 .m(3)
21201 .n(4)
21202 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080021203 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021204 }
21205
21206 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, strided_cn) {
21207 GemmMicrokernelTester()
21208 .mr(3)
21209 .nr(4)
21210 .kr(2)
21211 .sr(1)
21212 .m(3)
21213 .n(4)
21214 .k(8)
21215 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080021216 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021217 }
21218
21219 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021220 for (uint32_t n = 1; n <= 4; n++) {
21221 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021222 GemmMicrokernelTester()
21223 .mr(3)
21224 .nr(4)
21225 .kr(2)
21226 .sr(1)
21227 .m(m)
21228 .n(n)
21229 .k(8)
21230 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021231 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021232 }
21233 }
21234 }
21235
21236 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
21237 for (uint32_t m = 1; m <= 3; m++) {
21238 GemmMicrokernelTester()
21239 .mr(3)
21240 .nr(4)
21241 .kr(2)
21242 .sr(1)
21243 .m(m)
21244 .n(4)
21245 .k(8)
21246 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021247 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021248 }
21249 }
21250
21251 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
21252 for (uint32_t n = 1; n <= 4; n++) {
21253 GemmMicrokernelTester()
21254 .mr(3)
21255 .nr(4)
21256 .kr(2)
21257 .sr(1)
21258 .m(3)
21259 .n(n)
21260 .k(8)
21261 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021262 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021263 }
21264 }
21265
21266 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8) {
21267 for (size_t k = 1; k < 8; k++) {
21268 GemmMicrokernelTester()
21269 .mr(3)
21270 .nr(4)
21271 .kr(2)
21272 .sr(1)
21273 .m(3)
21274 .n(4)
21275 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080021276 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021277 }
21278 }
21279
21280 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
21281 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021282 for (uint32_t n = 1; n <= 4; n++) {
21283 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021284 GemmMicrokernelTester()
21285 .mr(3)
21286 .nr(4)
21287 .kr(2)
21288 .sr(1)
21289 .m(m)
21290 .n(n)
21291 .k(k)
21292 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021293 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021294 }
21295 }
21296 }
21297 }
21298
21299 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8) {
21300 for (size_t k = 9; k < 16; k++) {
21301 GemmMicrokernelTester()
21302 .mr(3)
21303 .nr(4)
21304 .kr(2)
21305 .sr(1)
21306 .m(3)
21307 .n(4)
21308 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080021309 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021310 }
21311 }
21312
21313 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
21314 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021315 for (uint32_t n = 1; n <= 4; n++) {
21316 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021317 GemmMicrokernelTester()
21318 .mr(3)
21319 .nr(4)
21320 .kr(2)
21321 .sr(1)
21322 .m(m)
21323 .n(n)
21324 .k(k)
21325 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021326 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021327 }
21328 }
21329 }
21330 }
21331
21332 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_div_8) {
21333 for (size_t k = 16; k <= 80; k += 8) {
21334 GemmMicrokernelTester()
21335 .mr(3)
21336 .nr(4)
21337 .kr(2)
21338 .sr(1)
21339 .m(3)
21340 .n(4)
21341 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080021342 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021343 }
21344 }
21345
21346 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
21347 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021348 for (uint32_t n = 1; n <= 4; n++) {
21349 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021350 GemmMicrokernelTester()
21351 .mr(3)
21352 .nr(4)
21353 .kr(2)
21354 .sr(1)
21355 .m(m)
21356 .n(n)
21357 .k(k)
21358 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021359 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021360 }
21361 }
21362 }
21363 }
21364
21365 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4) {
21366 for (uint32_t n = 5; n < 8; n++) {
21367 for (size_t k = 1; k <= 40; k += 9) {
21368 GemmMicrokernelTester()
21369 .mr(3)
21370 .nr(4)
21371 .kr(2)
21372 .sr(1)
21373 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021374 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021375 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080021376 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021377 }
21378 }
21379 }
21380
21381 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
21382 for (uint32_t n = 5; n < 8; n++) {
21383 for (size_t k = 1; k <= 40; k += 9) {
21384 GemmMicrokernelTester()
21385 .mr(3)
21386 .nr(4)
21387 .kr(2)
21388 .sr(1)
21389 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021390 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021391 .k(k)
21392 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080021393 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021394 }
21395 }
21396 }
21397
21398 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
21399 for (uint32_t n = 5; n < 8; n++) {
21400 for (size_t k = 1; k <= 40; k += 9) {
21401 for (uint32_t m = 1; m <= 3; m++) {
21402 GemmMicrokernelTester()
21403 .mr(3)
21404 .nr(4)
21405 .kr(2)
21406 .sr(1)
21407 .m(m)
21408 .n(n)
21409 .k(k)
21410 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021411 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021412 }
21413 }
21414 }
21415 }
21416
21417 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, n_div_4) {
21418 for (uint32_t n = 8; n <= 12; n += 4) {
21419 for (size_t k = 1; k <= 40; k += 9) {
21420 GemmMicrokernelTester()
21421 .mr(3)
21422 .nr(4)
21423 .kr(2)
21424 .sr(1)
21425 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021426 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021427 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080021428 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021429 }
21430 }
21431 }
21432
21433 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
21434 for (uint32_t n = 8; n <= 12; n += 4) {
21435 for (size_t k = 1; k <= 40; k += 9) {
21436 GemmMicrokernelTester()
21437 .mr(3)
21438 .nr(4)
21439 .kr(2)
21440 .sr(1)
21441 .m(3)
21442 .n(n)
21443 .k(k)
21444 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080021445 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021446 }
21447 }
21448 }
21449
21450 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
21451 for (uint32_t n = 8; n <= 12; n += 4) {
21452 for (size_t k = 1; k <= 40; k += 9) {
21453 for (uint32_t m = 1; m <= 3; m++) {
21454 GemmMicrokernelTester()
21455 .mr(3)
21456 .nr(4)
21457 .kr(2)
21458 .sr(1)
21459 .m(m)
21460 .n(n)
21461 .k(k)
21462 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021463 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021464 }
21465 }
21466 }
21467 }
21468
21469 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, small_kernel) {
21470 for (size_t k = 1; k <= 40; k += 9) {
21471 GemmMicrokernelTester()
21472 .mr(3)
21473 .nr(4)
21474 .kr(2)
21475 .sr(1)
21476 .m(3)
21477 .n(4)
21478 .k(k)
21479 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080021480 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021481 }
21482 }
21483
21484 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, small_kernel_subtile) {
21485 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021486 for (uint32_t n = 1; n <= 4; n++) {
21487 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021488 GemmMicrokernelTester()
21489 .mr(3)
21490 .nr(4)
21491 .kr(2)
21492 .sr(1)
21493 .m(m)
21494 .n(n)
21495 .k(k)
21496 .ks(3)
21497 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021498 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021499 }
21500 }
21501 }
21502 }
21503
21504 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_small_kernel) {
21505 for (uint32_t n = 5; n < 8; n++) {
21506 for (size_t k = 1; k <= 40; k += 9) {
21507 GemmMicrokernelTester()
21508 .mr(3)
21509 .nr(4)
21510 .kr(2)
21511 .sr(1)
21512 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021513 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021514 .k(k)
21515 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080021516 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021517 }
21518 }
21519 }
21520
21521 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_small_kernel) {
21522 for (uint32_t n = 8; n <= 12; n += 4) {
21523 for (size_t k = 1; k <= 40; k += 9) {
21524 GemmMicrokernelTester()
21525 .mr(3)
21526 .nr(4)
21527 .kr(2)
21528 .sr(1)
21529 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021530 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021531 .k(k)
21532 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080021533 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021534 }
21535 }
21536 }
21537
21538 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
21539 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021540 for (uint32_t n = 1; n <= 4; n++) {
21541 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021542 GemmMicrokernelTester()
21543 .mr(3)
21544 .nr(4)
21545 .kr(2)
21546 .sr(1)
21547 .m(m)
21548 .n(n)
21549 .k(k)
21550 .cm_stride(7)
21551 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021552 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021553 }
21554 }
21555 }
21556 }
21557
21558 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, a_offset) {
21559 for (size_t k = 1; k <= 40; k += 9) {
21560 GemmMicrokernelTester()
21561 .mr(3)
21562 .nr(4)
21563 .kr(2)
21564 .sr(1)
21565 .m(3)
21566 .n(4)
21567 .k(k)
21568 .ks(3)
21569 .a_offset(127)
Marat Dukhan50323b82022-01-11 00:12:01 -080021570 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021571 }
21572 }
21573
21574 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021575 for (size_t k = 1; k <= 40; k += 9) {
21576 for (uint32_t mz = 0; mz < 3; mz++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021577 GemmMicrokernelTester()
21578 .mr(3)
21579 .nr(4)
21580 .kr(2)
21581 .sr(1)
21582 .m(3)
21583 .n(4)
21584 .k(k)
21585 .ks(3)
21586 .a_offset(127)
21587 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080021588 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021589 }
21590 }
21591 }
21592
21593 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, qmin) {
21594 GemmMicrokernelTester()
21595 .mr(3)
21596 .nr(4)
21597 .kr(2)
21598 .sr(1)
21599 .m(3)
21600 .n(4)
21601 .k(8)
21602 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021603 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021604 }
21605
21606 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, qmax) {
21607 GemmMicrokernelTester()
21608 .mr(3)
21609 .nr(4)
21610 .kr(2)
21611 .sr(1)
21612 .m(3)
21613 .n(4)
21614 .k(8)
21615 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021616 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021617 }
21618
21619 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, strided_cm) {
21620 GemmMicrokernelTester()
21621 .mr(3)
21622 .nr(4)
21623 .kr(2)
21624 .sr(1)
21625 .m(3)
21626 .n(4)
21627 .k(8)
21628 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080021629 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021630 }
21631
21632 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, no_a_zero_point) {
21633 for (size_t k = 1; k <= 40; k += 9) {
21634 GemmMicrokernelTester()
21635 .mr(3)
21636 .nr(4)
21637 .kr(2)
21638 .sr(1)
21639 .m(3)
21640 .n(4)
21641 .k(k)
21642 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080021643 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021644 }
21645 }
21646
21647 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, no_b_zero_point) {
21648 for (size_t k = 1; k <= 40; k += 9) {
21649 GemmMicrokernelTester()
21650 .mr(3)
21651 .nr(4)
21652 .kr(2)
21653 .sr(1)
21654 .m(3)
21655 .n(4)
21656 .k(k)
21657 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080021658 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021659 }
21660 }
21661
21662 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, no_zero_point) {
21663 for (size_t k = 1; k <= 40; k += 9) {
21664 GemmMicrokernelTester()
21665 .mr(3)
21666 .nr(4)
21667 .kr(2)
21668 .sr(1)
21669 .m(3)
21670 .n(4)
21671 .k(k)
21672 .a_zero_point(0)
21673 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080021674 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021675 }
21676 }
Marat Dukhan4c617792021-12-21 15:47:58 -080021677#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021678
21679
Marat Dukhan4c617792021-12-21 15:47:58 -080021680#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021681 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8) {
21682 GemmMicrokernelTester()
21683 .mr(4)
21684 .nr(4)
21685 .kr(2)
21686 .sr(1)
21687 .m(4)
21688 .n(4)
21689 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080021690 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021691 }
21692
21693 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, strided_cn) {
21694 GemmMicrokernelTester()
21695 .mr(4)
21696 .nr(4)
21697 .kr(2)
21698 .sr(1)
21699 .m(4)
21700 .n(4)
21701 .k(8)
21702 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080021703 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021704 }
21705
21706 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021707 for (uint32_t n = 1; n <= 4; n++) {
21708 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021709 GemmMicrokernelTester()
21710 .mr(4)
21711 .nr(4)
21712 .kr(2)
21713 .sr(1)
21714 .m(m)
21715 .n(n)
21716 .k(8)
21717 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021718 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021719 }
21720 }
21721 }
21722
21723 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
21724 for (uint32_t m = 1; m <= 4; m++) {
21725 GemmMicrokernelTester()
21726 .mr(4)
21727 .nr(4)
21728 .kr(2)
21729 .sr(1)
21730 .m(m)
21731 .n(4)
21732 .k(8)
21733 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021734 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021735 }
21736 }
21737
21738 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
21739 for (uint32_t n = 1; n <= 4; n++) {
21740 GemmMicrokernelTester()
21741 .mr(4)
21742 .nr(4)
21743 .kr(2)
21744 .sr(1)
21745 .m(4)
21746 .n(n)
21747 .k(8)
21748 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021749 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021750 }
21751 }
21752
21753 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_lt_8) {
21754 for (size_t k = 1; k < 8; k++) {
21755 GemmMicrokernelTester()
21756 .mr(4)
21757 .nr(4)
21758 .kr(2)
21759 .sr(1)
21760 .m(4)
21761 .n(4)
21762 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080021763 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021764 }
21765 }
21766
21767 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
21768 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021769 for (uint32_t n = 1; n <= 4; n++) {
21770 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021771 GemmMicrokernelTester()
21772 .mr(4)
21773 .nr(4)
21774 .kr(2)
21775 .sr(1)
21776 .m(m)
21777 .n(n)
21778 .k(k)
21779 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021780 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021781 }
21782 }
21783 }
21784 }
21785
21786 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_gt_8) {
21787 for (size_t k = 9; k < 16; k++) {
21788 GemmMicrokernelTester()
21789 .mr(4)
21790 .nr(4)
21791 .kr(2)
21792 .sr(1)
21793 .m(4)
21794 .n(4)
21795 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080021796 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021797 }
21798 }
21799
21800 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
21801 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021802 for (uint32_t n = 1; n <= 4; n++) {
21803 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021804 GemmMicrokernelTester()
21805 .mr(4)
21806 .nr(4)
21807 .kr(2)
21808 .sr(1)
21809 .m(m)
21810 .n(n)
21811 .k(k)
21812 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021813 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021814 }
21815 }
21816 }
21817 }
21818
21819 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_div_8) {
21820 for (size_t k = 16; k <= 80; k += 8) {
21821 GemmMicrokernelTester()
21822 .mr(4)
21823 .nr(4)
21824 .kr(2)
21825 .sr(1)
21826 .m(4)
21827 .n(4)
21828 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080021829 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021830 }
21831 }
21832
21833 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
21834 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021835 for (uint32_t n = 1; n <= 4; n++) {
21836 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021837 GemmMicrokernelTester()
21838 .mr(4)
21839 .nr(4)
21840 .kr(2)
21841 .sr(1)
21842 .m(m)
21843 .n(n)
21844 .k(k)
21845 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021846 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021847 }
21848 }
21849 }
21850 }
21851
21852 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4) {
21853 for (uint32_t n = 5; n < 8; n++) {
21854 for (size_t k = 1; k <= 40; k += 9) {
21855 GemmMicrokernelTester()
21856 .mr(4)
21857 .nr(4)
21858 .kr(2)
21859 .sr(1)
21860 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021861 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021862 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080021863 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021864 }
21865 }
21866 }
21867
21868 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
21869 for (uint32_t n = 5; n < 8; n++) {
21870 for (size_t k = 1; k <= 40; k += 9) {
21871 GemmMicrokernelTester()
21872 .mr(4)
21873 .nr(4)
21874 .kr(2)
21875 .sr(1)
21876 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021877 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021878 .k(k)
21879 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080021880 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021881 }
21882 }
21883 }
21884
21885 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
21886 for (uint32_t n = 5; n < 8; n++) {
21887 for (size_t k = 1; k <= 40; k += 9) {
21888 for (uint32_t m = 1; m <= 4; m++) {
21889 GemmMicrokernelTester()
21890 .mr(4)
21891 .nr(4)
21892 .kr(2)
21893 .sr(1)
21894 .m(m)
21895 .n(n)
21896 .k(k)
21897 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021898 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021899 }
21900 }
21901 }
21902 }
21903
21904 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, n_div_4) {
21905 for (uint32_t n = 8; n <= 12; n += 4) {
21906 for (size_t k = 1; k <= 40; k += 9) {
21907 GemmMicrokernelTester()
21908 .mr(4)
21909 .nr(4)
21910 .kr(2)
21911 .sr(1)
21912 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021913 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021914 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080021915 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021916 }
21917 }
21918 }
21919
21920 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
21921 for (uint32_t n = 8; n <= 12; n += 4) {
21922 for (size_t k = 1; k <= 40; k += 9) {
21923 GemmMicrokernelTester()
21924 .mr(4)
21925 .nr(4)
21926 .kr(2)
21927 .sr(1)
21928 .m(4)
21929 .n(n)
21930 .k(k)
21931 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080021932 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021933 }
21934 }
21935 }
21936
21937 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
21938 for (uint32_t n = 8; n <= 12; n += 4) {
21939 for (size_t k = 1; k <= 40; k += 9) {
21940 for (uint32_t m = 1; m <= 4; m++) {
21941 GemmMicrokernelTester()
21942 .mr(4)
21943 .nr(4)
21944 .kr(2)
21945 .sr(1)
21946 .m(m)
21947 .n(n)
21948 .k(k)
21949 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021950 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021951 }
21952 }
21953 }
21954 }
21955
21956 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, small_kernel) {
21957 for (size_t k = 1; k <= 40; k += 9) {
21958 GemmMicrokernelTester()
21959 .mr(4)
21960 .nr(4)
21961 .kr(2)
21962 .sr(1)
21963 .m(4)
21964 .n(4)
21965 .k(k)
21966 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080021967 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021968 }
21969 }
21970
21971 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, small_kernel_subtile) {
21972 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021973 for (uint32_t n = 1; n <= 4; n++) {
21974 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021975 GemmMicrokernelTester()
21976 .mr(4)
21977 .nr(4)
21978 .kr(2)
21979 .sr(1)
21980 .m(m)
21981 .n(n)
21982 .k(k)
21983 .ks(3)
21984 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021985 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070021986 }
21987 }
21988 }
21989 }
21990
21991 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_small_kernel) {
21992 for (uint32_t n = 5; n < 8; n++) {
21993 for (size_t k = 1; k <= 40; k += 9) {
21994 GemmMicrokernelTester()
21995 .mr(4)
21996 .nr(4)
21997 .kr(2)
21998 .sr(1)
21999 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022000 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070022001 .k(k)
22002 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080022003 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070022004 }
22005 }
22006 }
22007
22008 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_small_kernel) {
22009 for (uint32_t n = 8; n <= 12; n += 4) {
22010 for (size_t k = 1; k <= 40; k += 9) {
22011 GemmMicrokernelTester()
22012 .mr(4)
22013 .nr(4)
22014 .kr(2)
22015 .sr(1)
22016 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022017 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070022018 .k(k)
22019 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080022020 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070022021 }
22022 }
22023 }
22024
22025 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
22026 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022027 for (uint32_t n = 1; n <= 4; n++) {
22028 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070022029 GemmMicrokernelTester()
22030 .mr(4)
22031 .nr(4)
22032 .kr(2)
22033 .sr(1)
22034 .m(m)
22035 .n(n)
22036 .k(k)
22037 .cm_stride(7)
22038 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022039 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070022040 }
22041 }
22042 }
22043 }
22044
22045 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, a_offset) {
22046 for (size_t k = 1; k <= 40; k += 9) {
22047 GemmMicrokernelTester()
22048 .mr(4)
22049 .nr(4)
22050 .kr(2)
22051 .sr(1)
22052 .m(4)
22053 .n(4)
22054 .k(k)
22055 .ks(3)
22056 .a_offset(163)
Marat Dukhan50323b82022-01-11 00:12:01 -080022057 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070022058 }
22059 }
22060
22061 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022062 for (size_t k = 1; k <= 40; k += 9) {
22063 for (uint32_t mz = 0; mz < 4; mz++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070022064 GemmMicrokernelTester()
22065 .mr(4)
22066 .nr(4)
22067 .kr(2)
22068 .sr(1)
22069 .m(4)
22070 .n(4)
22071 .k(k)
22072 .ks(3)
22073 .a_offset(163)
22074 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080022075 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070022076 }
22077 }
22078 }
22079
22080 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, qmin) {
22081 GemmMicrokernelTester()
22082 .mr(4)
22083 .nr(4)
22084 .kr(2)
22085 .sr(1)
22086 .m(4)
22087 .n(4)
22088 .k(8)
22089 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022090 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070022091 }
22092
22093 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, qmax) {
22094 GemmMicrokernelTester()
22095 .mr(4)
22096 .nr(4)
22097 .kr(2)
22098 .sr(1)
22099 .m(4)
22100 .n(4)
22101 .k(8)
22102 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022103 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070022104 }
22105
22106 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, strided_cm) {
22107 GemmMicrokernelTester()
22108 .mr(4)
22109 .nr(4)
22110 .kr(2)
22111 .sr(1)
22112 .m(4)
22113 .n(4)
22114 .k(8)
22115 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080022116 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070022117 }
22118
22119 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, no_a_zero_point) {
22120 for (size_t k = 1; k <= 40; k += 9) {
22121 GemmMicrokernelTester()
22122 .mr(4)
22123 .nr(4)
22124 .kr(2)
22125 .sr(1)
22126 .m(4)
22127 .n(4)
22128 .k(k)
22129 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080022130 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070022131 }
22132 }
22133
22134 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, no_b_zero_point) {
22135 for (size_t k = 1; k <= 40; k += 9) {
22136 GemmMicrokernelTester()
22137 .mr(4)
22138 .nr(4)
22139 .kr(2)
22140 .sr(1)
22141 .m(4)
22142 .n(4)
22143 .k(k)
22144 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080022145 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070022146 }
22147 }
22148
22149 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, no_zero_point) {
22150 for (size_t k = 1; k <= 40; k += 9) {
22151 GemmMicrokernelTester()
22152 .mr(4)
22153 .nr(4)
22154 .kr(2)
22155 .sr(1)
22156 .m(4)
22157 .n(4)
22158 .k(k)
22159 .a_zero_point(0)
22160 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080022161 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070022162 }
22163 }
Marat Dukhan4c617792021-12-21 15:47:58 -080022164#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan8dc106e2021-08-31 15:23:02 -070022165
22166
Marat Dukhan4c617792021-12-21 15:47:58 -080022167#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan348c3772022-02-01 00:36:50 -080022168 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8) {
22169 GemmMicrokernelTester()
22170 .mr(1)
22171 .nr(4)
22172 .kr(2)
22173 .sr(4)
22174 .m(1)
22175 .n(4)
22176 .k(8)
22177 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
22178 }
22179
22180 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, strided_cn) {
22181 GemmMicrokernelTester()
22182 .mr(1)
22183 .nr(4)
22184 .kr(2)
22185 .sr(4)
22186 .m(1)
22187 .n(4)
22188 .k(8)
22189 .cn_stride(7)
22190 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
22191 }
22192
22193 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
22194 for (uint32_t n = 1; n <= 4; n++) {
22195 for (uint32_t m = 1; m <= 1; m++) {
22196 GemmMicrokernelTester()
22197 .mr(1)
22198 .nr(4)
22199 .kr(2)
22200 .sr(4)
22201 .m(m)
22202 .n(n)
22203 .k(8)
22204 .iterations(1)
22205 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
22206 }
22207 }
22208 }
22209
22210 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
22211 for (uint32_t m = 1; m <= 1; m++) {
22212 GemmMicrokernelTester()
22213 .mr(1)
22214 .nr(4)
22215 .kr(2)
22216 .sr(4)
22217 .m(m)
22218 .n(4)
22219 .k(8)
22220 .iterations(1)
22221 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
22222 }
22223 }
22224
22225 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
22226 for (uint32_t n = 1; n <= 4; n++) {
22227 GemmMicrokernelTester()
22228 .mr(1)
22229 .nr(4)
22230 .kr(2)
22231 .sr(4)
22232 .m(1)
22233 .n(n)
22234 .k(8)
22235 .iterations(1)
22236 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
22237 }
22238 }
22239
22240 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, k_lt_8) {
22241 for (size_t k = 1; k < 8; k++) {
22242 GemmMicrokernelTester()
22243 .mr(1)
22244 .nr(4)
22245 .kr(2)
22246 .sr(4)
22247 .m(1)
22248 .n(4)
22249 .k(k)
22250 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
22251 }
22252 }
22253
22254 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
22255 for (size_t k = 1; k < 8; k++) {
22256 for (uint32_t n = 1; n <= 4; n++) {
22257 for (uint32_t m = 1; m <= 1; m++) {
22258 GemmMicrokernelTester()
22259 .mr(1)
22260 .nr(4)
22261 .kr(2)
22262 .sr(4)
22263 .m(m)
22264 .n(n)
22265 .k(k)
22266 .iterations(1)
22267 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
22268 }
22269 }
22270 }
22271 }
22272
22273 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, k_gt_8) {
22274 for (size_t k = 9; k < 16; k++) {
22275 GemmMicrokernelTester()
22276 .mr(1)
22277 .nr(4)
22278 .kr(2)
22279 .sr(4)
22280 .m(1)
22281 .n(4)
22282 .k(k)
22283 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
22284 }
22285 }
22286
22287 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
22288 for (size_t k = 9; k < 16; k++) {
22289 for (uint32_t n = 1; n <= 4; n++) {
22290 for (uint32_t m = 1; m <= 1; m++) {
22291 GemmMicrokernelTester()
22292 .mr(1)
22293 .nr(4)
22294 .kr(2)
22295 .sr(4)
22296 .m(m)
22297 .n(n)
22298 .k(k)
22299 .iterations(1)
22300 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
22301 }
22302 }
22303 }
22304 }
22305
22306 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, k_div_8) {
22307 for (size_t k = 16; k <= 80; k += 8) {
22308 GemmMicrokernelTester()
22309 .mr(1)
22310 .nr(4)
22311 .kr(2)
22312 .sr(4)
22313 .m(1)
22314 .n(4)
22315 .k(k)
22316 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
22317 }
22318 }
22319
22320 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
22321 for (size_t k = 16; k <= 80; k += 8) {
22322 for (uint32_t n = 1; n <= 4; n++) {
22323 for (uint32_t m = 1; m <= 1; m++) {
22324 GemmMicrokernelTester()
22325 .mr(1)
22326 .nr(4)
22327 .kr(2)
22328 .sr(4)
22329 .m(m)
22330 .n(n)
22331 .k(k)
22332 .iterations(1)
22333 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
22334 }
22335 }
22336 }
22337 }
22338
22339 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4) {
22340 for (uint32_t n = 5; n < 8; n++) {
22341 for (size_t k = 1; k <= 40; k += 9) {
22342 GemmMicrokernelTester()
22343 .mr(1)
22344 .nr(4)
22345 .kr(2)
22346 .sr(4)
22347 .m(1)
22348 .n(n)
22349 .k(k)
22350 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
22351 }
22352 }
22353 }
22354
22355 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
22356 for (uint32_t n = 5; n < 8; n++) {
22357 for (size_t k = 1; k <= 40; k += 9) {
22358 GemmMicrokernelTester()
22359 .mr(1)
22360 .nr(4)
22361 .kr(2)
22362 .sr(4)
22363 .m(1)
22364 .n(n)
22365 .k(k)
22366 .cn_stride(7)
22367 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
22368 }
22369 }
22370 }
22371
22372 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
22373 for (uint32_t n = 5; n < 8; n++) {
22374 for (size_t k = 1; k <= 40; k += 9) {
22375 for (uint32_t m = 1; m <= 1; m++) {
22376 GemmMicrokernelTester()
22377 .mr(1)
22378 .nr(4)
22379 .kr(2)
22380 .sr(4)
22381 .m(m)
22382 .n(n)
22383 .k(k)
22384 .iterations(1)
22385 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
22386 }
22387 }
22388 }
22389 }
22390
22391 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4) {
22392 for (uint32_t n = 8; n <= 12; n += 4) {
22393 for (size_t k = 1; k <= 40; k += 9) {
22394 GemmMicrokernelTester()
22395 .mr(1)
22396 .nr(4)
22397 .kr(2)
22398 .sr(4)
22399 .m(1)
22400 .n(n)
22401 .k(k)
22402 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
22403 }
22404 }
22405 }
22406
22407 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
22408 for (uint32_t n = 8; n <= 12; n += 4) {
22409 for (size_t k = 1; k <= 40; k += 9) {
22410 GemmMicrokernelTester()
22411 .mr(1)
22412 .nr(4)
22413 .kr(2)
22414 .sr(4)
22415 .m(1)
22416 .n(n)
22417 .k(k)
22418 .cn_stride(7)
22419 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
22420 }
22421 }
22422 }
22423
22424 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
22425 for (uint32_t n = 8; n <= 12; n += 4) {
22426 for (size_t k = 1; k <= 40; k += 9) {
22427 for (uint32_t m = 1; m <= 1; m++) {
22428 GemmMicrokernelTester()
22429 .mr(1)
22430 .nr(4)
22431 .kr(2)
22432 .sr(4)
22433 .m(m)
22434 .n(n)
22435 .k(k)
22436 .iterations(1)
22437 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
22438 }
22439 }
22440 }
22441 }
22442
22443 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, small_kernel) {
22444 for (size_t k = 1; k <= 40; k += 9) {
22445 GemmMicrokernelTester()
22446 .mr(1)
22447 .nr(4)
22448 .kr(2)
22449 .sr(4)
22450 .m(1)
22451 .n(4)
22452 .k(k)
22453 .ks(3)
22454 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
22455 }
22456 }
22457
22458 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, small_kernel_subtile) {
22459 for (size_t k = 1; k <= 40; k += 9) {
22460 for (uint32_t n = 1; n <= 4; n++) {
22461 for (uint32_t m = 1; m <= 1; m++) {
22462 GemmMicrokernelTester()
22463 .mr(1)
22464 .nr(4)
22465 .kr(2)
22466 .sr(4)
22467 .m(m)
22468 .n(n)
22469 .k(k)
22470 .ks(3)
22471 .iterations(1)
22472 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
22473 }
22474 }
22475 }
22476 }
22477
22478 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4_small_kernel) {
22479 for (uint32_t n = 5; n < 8; n++) {
22480 for (size_t k = 1; k <= 40; k += 9) {
22481 GemmMicrokernelTester()
22482 .mr(1)
22483 .nr(4)
22484 .kr(2)
22485 .sr(4)
22486 .m(1)
22487 .n(n)
22488 .k(k)
22489 .ks(3)
22490 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
22491 }
22492 }
22493 }
22494
22495 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4_small_kernel) {
22496 for (uint32_t n = 8; n <= 12; n += 4) {
22497 for (size_t k = 1; k <= 40; k += 9) {
22498 GemmMicrokernelTester()
22499 .mr(1)
22500 .nr(4)
22501 .kr(2)
22502 .sr(4)
22503 .m(1)
22504 .n(n)
22505 .k(k)
22506 .ks(3)
22507 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
22508 }
22509 }
22510 }
22511
22512 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
22513 for (size_t k = 1; k <= 40; k += 9) {
22514 for (uint32_t n = 1; n <= 4; n++) {
22515 for (uint32_t m = 1; m <= 1; m++) {
22516 GemmMicrokernelTester()
22517 .mr(1)
22518 .nr(4)
22519 .kr(2)
22520 .sr(4)
22521 .m(m)
22522 .n(n)
22523 .k(k)
22524 .cm_stride(7)
22525 .iterations(1)
22526 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
22527 }
22528 }
22529 }
22530 }
22531
22532 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, a_offset) {
22533 for (size_t k = 1; k <= 40; k += 9) {
22534 GemmMicrokernelTester()
22535 .mr(1)
22536 .nr(4)
22537 .kr(2)
22538 .sr(4)
22539 .m(1)
22540 .n(4)
22541 .k(k)
22542 .ks(3)
22543 .a_offset(43)
22544 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
22545 }
22546 }
22547
22548 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, zero) {
22549 for (size_t k = 1; k <= 40; k += 9) {
22550 for (uint32_t mz = 0; mz < 1; mz++) {
22551 GemmMicrokernelTester()
22552 .mr(1)
22553 .nr(4)
22554 .kr(2)
22555 .sr(4)
22556 .m(1)
22557 .n(4)
22558 .k(k)
22559 .ks(3)
22560 .a_offset(43)
22561 .zero_index(mz)
22562 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
22563 }
22564 }
22565 }
22566
22567 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, qmin) {
22568 GemmMicrokernelTester()
22569 .mr(1)
22570 .nr(4)
22571 .kr(2)
22572 .sr(4)
22573 .m(1)
22574 .n(4)
22575 .k(8)
22576 .qmin(128)
22577 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
22578 }
22579
22580 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, qmax) {
22581 GemmMicrokernelTester()
22582 .mr(1)
22583 .nr(4)
22584 .kr(2)
22585 .sr(4)
22586 .m(1)
22587 .n(4)
22588 .k(8)
22589 .qmax(128)
22590 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
22591 }
22592
22593 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, strided_cm) {
22594 GemmMicrokernelTester()
22595 .mr(1)
22596 .nr(4)
22597 .kr(2)
22598 .sr(4)
22599 .m(1)
22600 .n(4)
22601 .k(8)
22602 .cm_stride(7)
22603 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
22604 }
22605
22606 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, no_a_zero_point) {
22607 for (size_t k = 1; k <= 40; k += 9) {
22608 GemmMicrokernelTester()
22609 .mr(1)
22610 .nr(4)
22611 .kr(2)
22612 .sr(4)
22613 .m(1)
22614 .n(4)
22615 .k(k)
22616 .a_zero_point(0)
22617 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
22618 }
22619 }
22620
22621 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, no_b_zero_point) {
22622 for (size_t k = 1; k <= 40; k += 9) {
22623 GemmMicrokernelTester()
22624 .mr(1)
22625 .nr(4)
22626 .kr(2)
22627 .sr(4)
22628 .m(1)
22629 .n(4)
22630 .k(k)
22631 .b_zero_point(0)
22632 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
22633 }
22634 }
22635
22636 TEST(QU8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, no_zero_point) {
22637 for (size_t k = 1; k <= 40; k += 9) {
22638 GemmMicrokernelTester()
22639 .mr(1)
22640 .nr(4)
22641 .kr(2)
22642 .sr(4)
22643 .m(1)
22644 .n(4)
22645 .k(k)
22646 .a_zero_point(0)
22647 .b_zero_point(0)
22648 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
22649 }
22650 }
22651#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
22652
22653
22654#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
22655 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8) {
22656 GemmMicrokernelTester()
22657 .mr(2)
22658 .nr(4)
22659 .kr(2)
22660 .sr(4)
22661 .m(2)
22662 .n(4)
22663 .k(8)
22664 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
22665 }
22666
22667 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, strided_cn) {
22668 GemmMicrokernelTester()
22669 .mr(2)
22670 .nr(4)
22671 .kr(2)
22672 .sr(4)
22673 .m(2)
22674 .n(4)
22675 .k(8)
22676 .cn_stride(7)
22677 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
22678 }
22679
22680 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
22681 for (uint32_t n = 1; n <= 4; n++) {
22682 for (uint32_t m = 1; m <= 2; m++) {
22683 GemmMicrokernelTester()
22684 .mr(2)
22685 .nr(4)
22686 .kr(2)
22687 .sr(4)
22688 .m(m)
22689 .n(n)
22690 .k(8)
22691 .iterations(1)
22692 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
22693 }
22694 }
22695 }
22696
22697 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
22698 for (uint32_t m = 1; m <= 2; m++) {
22699 GemmMicrokernelTester()
22700 .mr(2)
22701 .nr(4)
22702 .kr(2)
22703 .sr(4)
22704 .m(m)
22705 .n(4)
22706 .k(8)
22707 .iterations(1)
22708 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
22709 }
22710 }
22711
22712 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
22713 for (uint32_t n = 1; n <= 4; n++) {
22714 GemmMicrokernelTester()
22715 .mr(2)
22716 .nr(4)
22717 .kr(2)
22718 .sr(4)
22719 .m(2)
22720 .n(n)
22721 .k(8)
22722 .iterations(1)
22723 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
22724 }
22725 }
22726
22727 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, k_lt_8) {
22728 for (size_t k = 1; k < 8; k++) {
22729 GemmMicrokernelTester()
22730 .mr(2)
22731 .nr(4)
22732 .kr(2)
22733 .sr(4)
22734 .m(2)
22735 .n(4)
22736 .k(k)
22737 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
22738 }
22739 }
22740
22741 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
22742 for (size_t k = 1; k < 8; k++) {
22743 for (uint32_t n = 1; n <= 4; n++) {
22744 for (uint32_t m = 1; m <= 2; m++) {
22745 GemmMicrokernelTester()
22746 .mr(2)
22747 .nr(4)
22748 .kr(2)
22749 .sr(4)
22750 .m(m)
22751 .n(n)
22752 .k(k)
22753 .iterations(1)
22754 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
22755 }
22756 }
22757 }
22758 }
22759
22760 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, k_gt_8) {
22761 for (size_t k = 9; k < 16; k++) {
22762 GemmMicrokernelTester()
22763 .mr(2)
22764 .nr(4)
22765 .kr(2)
22766 .sr(4)
22767 .m(2)
22768 .n(4)
22769 .k(k)
22770 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
22771 }
22772 }
22773
22774 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
22775 for (size_t k = 9; k < 16; k++) {
22776 for (uint32_t n = 1; n <= 4; n++) {
22777 for (uint32_t m = 1; m <= 2; m++) {
22778 GemmMicrokernelTester()
22779 .mr(2)
22780 .nr(4)
22781 .kr(2)
22782 .sr(4)
22783 .m(m)
22784 .n(n)
22785 .k(k)
22786 .iterations(1)
22787 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
22788 }
22789 }
22790 }
22791 }
22792
22793 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, k_div_8) {
22794 for (size_t k = 16; k <= 80; k += 8) {
22795 GemmMicrokernelTester()
22796 .mr(2)
22797 .nr(4)
22798 .kr(2)
22799 .sr(4)
22800 .m(2)
22801 .n(4)
22802 .k(k)
22803 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
22804 }
22805 }
22806
22807 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
22808 for (size_t k = 16; k <= 80; k += 8) {
22809 for (uint32_t n = 1; n <= 4; n++) {
22810 for (uint32_t m = 1; m <= 2; m++) {
22811 GemmMicrokernelTester()
22812 .mr(2)
22813 .nr(4)
22814 .kr(2)
22815 .sr(4)
22816 .m(m)
22817 .n(n)
22818 .k(k)
22819 .iterations(1)
22820 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
22821 }
22822 }
22823 }
22824 }
22825
22826 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4) {
22827 for (uint32_t n = 5; n < 8; n++) {
22828 for (size_t k = 1; k <= 40; k += 9) {
22829 GemmMicrokernelTester()
22830 .mr(2)
22831 .nr(4)
22832 .kr(2)
22833 .sr(4)
22834 .m(2)
22835 .n(n)
22836 .k(k)
22837 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
22838 }
22839 }
22840 }
22841
22842 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
22843 for (uint32_t n = 5; n < 8; n++) {
22844 for (size_t k = 1; k <= 40; k += 9) {
22845 GemmMicrokernelTester()
22846 .mr(2)
22847 .nr(4)
22848 .kr(2)
22849 .sr(4)
22850 .m(2)
22851 .n(n)
22852 .k(k)
22853 .cn_stride(7)
22854 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
22855 }
22856 }
22857 }
22858
22859 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
22860 for (uint32_t n = 5; n < 8; n++) {
22861 for (size_t k = 1; k <= 40; k += 9) {
22862 for (uint32_t m = 1; m <= 2; m++) {
22863 GemmMicrokernelTester()
22864 .mr(2)
22865 .nr(4)
22866 .kr(2)
22867 .sr(4)
22868 .m(m)
22869 .n(n)
22870 .k(k)
22871 .iterations(1)
22872 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
22873 }
22874 }
22875 }
22876 }
22877
22878 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4) {
22879 for (uint32_t n = 8; n <= 12; n += 4) {
22880 for (size_t k = 1; k <= 40; k += 9) {
22881 GemmMicrokernelTester()
22882 .mr(2)
22883 .nr(4)
22884 .kr(2)
22885 .sr(4)
22886 .m(2)
22887 .n(n)
22888 .k(k)
22889 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
22890 }
22891 }
22892 }
22893
22894 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
22895 for (uint32_t n = 8; n <= 12; n += 4) {
22896 for (size_t k = 1; k <= 40; k += 9) {
22897 GemmMicrokernelTester()
22898 .mr(2)
22899 .nr(4)
22900 .kr(2)
22901 .sr(4)
22902 .m(2)
22903 .n(n)
22904 .k(k)
22905 .cn_stride(7)
22906 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
22907 }
22908 }
22909 }
22910
22911 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
22912 for (uint32_t n = 8; n <= 12; n += 4) {
22913 for (size_t k = 1; k <= 40; k += 9) {
22914 for (uint32_t m = 1; m <= 2; m++) {
22915 GemmMicrokernelTester()
22916 .mr(2)
22917 .nr(4)
22918 .kr(2)
22919 .sr(4)
22920 .m(m)
22921 .n(n)
22922 .k(k)
22923 .iterations(1)
22924 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
22925 }
22926 }
22927 }
22928 }
22929
22930 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, small_kernel) {
22931 for (size_t k = 1; k <= 40; k += 9) {
22932 GemmMicrokernelTester()
22933 .mr(2)
22934 .nr(4)
22935 .kr(2)
22936 .sr(4)
22937 .m(2)
22938 .n(4)
22939 .k(k)
22940 .ks(3)
22941 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
22942 }
22943 }
22944
22945 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, small_kernel_subtile) {
22946 for (size_t k = 1; k <= 40; k += 9) {
22947 for (uint32_t n = 1; n <= 4; n++) {
22948 for (uint32_t m = 1; m <= 2; m++) {
22949 GemmMicrokernelTester()
22950 .mr(2)
22951 .nr(4)
22952 .kr(2)
22953 .sr(4)
22954 .m(m)
22955 .n(n)
22956 .k(k)
22957 .ks(3)
22958 .iterations(1)
22959 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
22960 }
22961 }
22962 }
22963 }
22964
22965 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4_small_kernel) {
22966 for (uint32_t n = 5; n < 8; n++) {
22967 for (size_t k = 1; k <= 40; k += 9) {
22968 GemmMicrokernelTester()
22969 .mr(2)
22970 .nr(4)
22971 .kr(2)
22972 .sr(4)
22973 .m(2)
22974 .n(n)
22975 .k(k)
22976 .ks(3)
22977 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
22978 }
22979 }
22980 }
22981
22982 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4_small_kernel) {
22983 for (uint32_t n = 8; n <= 12; n += 4) {
22984 for (size_t k = 1; k <= 40; k += 9) {
22985 GemmMicrokernelTester()
22986 .mr(2)
22987 .nr(4)
22988 .kr(2)
22989 .sr(4)
22990 .m(2)
22991 .n(n)
22992 .k(k)
22993 .ks(3)
22994 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
22995 }
22996 }
22997 }
22998
22999 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
23000 for (size_t k = 1; k <= 40; k += 9) {
23001 for (uint32_t n = 1; n <= 4; n++) {
23002 for (uint32_t m = 1; m <= 2; m++) {
23003 GemmMicrokernelTester()
23004 .mr(2)
23005 .nr(4)
23006 .kr(2)
23007 .sr(4)
23008 .m(m)
23009 .n(n)
23010 .k(k)
23011 .cm_stride(7)
23012 .iterations(1)
23013 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
23014 }
23015 }
23016 }
23017 }
23018
23019 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, a_offset) {
23020 for (size_t k = 1; k <= 40; k += 9) {
23021 GemmMicrokernelTester()
23022 .mr(2)
23023 .nr(4)
23024 .kr(2)
23025 .sr(4)
23026 .m(2)
23027 .n(4)
23028 .k(k)
23029 .ks(3)
23030 .a_offset(83)
23031 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
23032 }
23033 }
23034
23035 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, zero) {
23036 for (size_t k = 1; k <= 40; k += 9) {
23037 for (uint32_t mz = 0; mz < 2; mz++) {
23038 GemmMicrokernelTester()
23039 .mr(2)
23040 .nr(4)
23041 .kr(2)
23042 .sr(4)
23043 .m(2)
23044 .n(4)
23045 .k(k)
23046 .ks(3)
23047 .a_offset(83)
23048 .zero_index(mz)
23049 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
23050 }
23051 }
23052 }
23053
23054 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, qmin) {
23055 GemmMicrokernelTester()
23056 .mr(2)
23057 .nr(4)
23058 .kr(2)
23059 .sr(4)
23060 .m(2)
23061 .n(4)
23062 .k(8)
23063 .qmin(128)
23064 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
23065 }
23066
23067 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, qmax) {
23068 GemmMicrokernelTester()
23069 .mr(2)
23070 .nr(4)
23071 .kr(2)
23072 .sr(4)
23073 .m(2)
23074 .n(4)
23075 .k(8)
23076 .qmax(128)
23077 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
23078 }
23079
23080 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, strided_cm) {
23081 GemmMicrokernelTester()
23082 .mr(2)
23083 .nr(4)
23084 .kr(2)
23085 .sr(4)
23086 .m(2)
23087 .n(4)
23088 .k(8)
23089 .cm_stride(7)
23090 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
23091 }
23092
23093 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, no_a_zero_point) {
23094 for (size_t k = 1; k <= 40; k += 9) {
23095 GemmMicrokernelTester()
23096 .mr(2)
23097 .nr(4)
23098 .kr(2)
23099 .sr(4)
23100 .m(2)
23101 .n(4)
23102 .k(k)
23103 .a_zero_point(0)
23104 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
23105 }
23106 }
23107
23108 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, no_b_zero_point) {
23109 for (size_t k = 1; k <= 40; k += 9) {
23110 GemmMicrokernelTester()
23111 .mr(2)
23112 .nr(4)
23113 .kr(2)
23114 .sr(4)
23115 .m(2)
23116 .n(4)
23117 .k(k)
23118 .b_zero_point(0)
23119 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
23120 }
23121 }
23122
23123 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, no_zero_point) {
23124 for (size_t k = 1; k <= 40; k += 9) {
23125 GemmMicrokernelTester()
23126 .mr(2)
23127 .nr(4)
23128 .kr(2)
23129 .sr(4)
23130 .m(2)
23131 .n(4)
23132 .k(k)
23133 .a_zero_point(0)
23134 .b_zero_point(0)
23135 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
23136 }
23137 }
23138#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
23139
23140
23141#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
23142 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8) {
23143 GemmMicrokernelTester()
23144 .mr(3)
23145 .nr(4)
23146 .kr(2)
23147 .sr(4)
23148 .m(3)
23149 .n(4)
23150 .k(8)
23151 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
23152 }
23153
23154 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, strided_cn) {
23155 GemmMicrokernelTester()
23156 .mr(3)
23157 .nr(4)
23158 .kr(2)
23159 .sr(4)
23160 .m(3)
23161 .n(4)
23162 .k(8)
23163 .cn_stride(7)
23164 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
23165 }
23166
23167 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
23168 for (uint32_t n = 1; n <= 4; n++) {
23169 for (uint32_t m = 1; m <= 3; m++) {
23170 GemmMicrokernelTester()
23171 .mr(3)
23172 .nr(4)
23173 .kr(2)
23174 .sr(4)
23175 .m(m)
23176 .n(n)
23177 .k(8)
23178 .iterations(1)
23179 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
23180 }
23181 }
23182 }
23183
23184 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
23185 for (uint32_t m = 1; m <= 3; m++) {
23186 GemmMicrokernelTester()
23187 .mr(3)
23188 .nr(4)
23189 .kr(2)
23190 .sr(4)
23191 .m(m)
23192 .n(4)
23193 .k(8)
23194 .iterations(1)
23195 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
23196 }
23197 }
23198
23199 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
23200 for (uint32_t n = 1; n <= 4; n++) {
23201 GemmMicrokernelTester()
23202 .mr(3)
23203 .nr(4)
23204 .kr(2)
23205 .sr(4)
23206 .m(3)
23207 .n(n)
23208 .k(8)
23209 .iterations(1)
23210 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
23211 }
23212 }
23213
23214 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, k_lt_8) {
23215 for (size_t k = 1; k < 8; k++) {
23216 GemmMicrokernelTester()
23217 .mr(3)
23218 .nr(4)
23219 .kr(2)
23220 .sr(4)
23221 .m(3)
23222 .n(4)
23223 .k(k)
23224 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
23225 }
23226 }
23227
23228 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
23229 for (size_t k = 1; k < 8; k++) {
23230 for (uint32_t n = 1; n <= 4; n++) {
23231 for (uint32_t m = 1; m <= 3; m++) {
23232 GemmMicrokernelTester()
23233 .mr(3)
23234 .nr(4)
23235 .kr(2)
23236 .sr(4)
23237 .m(m)
23238 .n(n)
23239 .k(k)
23240 .iterations(1)
23241 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
23242 }
23243 }
23244 }
23245 }
23246
23247 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, k_gt_8) {
23248 for (size_t k = 9; k < 16; k++) {
23249 GemmMicrokernelTester()
23250 .mr(3)
23251 .nr(4)
23252 .kr(2)
23253 .sr(4)
23254 .m(3)
23255 .n(4)
23256 .k(k)
23257 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
23258 }
23259 }
23260
23261 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
23262 for (size_t k = 9; k < 16; k++) {
23263 for (uint32_t n = 1; n <= 4; n++) {
23264 for (uint32_t m = 1; m <= 3; m++) {
23265 GemmMicrokernelTester()
23266 .mr(3)
23267 .nr(4)
23268 .kr(2)
23269 .sr(4)
23270 .m(m)
23271 .n(n)
23272 .k(k)
23273 .iterations(1)
23274 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
23275 }
23276 }
23277 }
23278 }
23279
23280 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, k_div_8) {
23281 for (size_t k = 16; k <= 80; k += 8) {
23282 GemmMicrokernelTester()
23283 .mr(3)
23284 .nr(4)
23285 .kr(2)
23286 .sr(4)
23287 .m(3)
23288 .n(4)
23289 .k(k)
23290 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
23291 }
23292 }
23293
23294 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
23295 for (size_t k = 16; k <= 80; k += 8) {
23296 for (uint32_t n = 1; n <= 4; n++) {
23297 for (uint32_t m = 1; m <= 3; m++) {
23298 GemmMicrokernelTester()
23299 .mr(3)
23300 .nr(4)
23301 .kr(2)
23302 .sr(4)
23303 .m(m)
23304 .n(n)
23305 .k(k)
23306 .iterations(1)
23307 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
23308 }
23309 }
23310 }
23311 }
23312
23313 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4) {
23314 for (uint32_t n = 5; n < 8; n++) {
23315 for (size_t k = 1; k <= 40; k += 9) {
23316 GemmMicrokernelTester()
23317 .mr(3)
23318 .nr(4)
23319 .kr(2)
23320 .sr(4)
23321 .m(3)
23322 .n(n)
23323 .k(k)
23324 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
23325 }
23326 }
23327 }
23328
23329 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
23330 for (uint32_t n = 5; n < 8; n++) {
23331 for (size_t k = 1; k <= 40; k += 9) {
23332 GemmMicrokernelTester()
23333 .mr(3)
23334 .nr(4)
23335 .kr(2)
23336 .sr(4)
23337 .m(3)
23338 .n(n)
23339 .k(k)
23340 .cn_stride(7)
23341 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
23342 }
23343 }
23344 }
23345
23346 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
23347 for (uint32_t n = 5; n < 8; n++) {
23348 for (size_t k = 1; k <= 40; k += 9) {
23349 for (uint32_t m = 1; m <= 3; m++) {
23350 GemmMicrokernelTester()
23351 .mr(3)
23352 .nr(4)
23353 .kr(2)
23354 .sr(4)
23355 .m(m)
23356 .n(n)
23357 .k(k)
23358 .iterations(1)
23359 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
23360 }
23361 }
23362 }
23363 }
23364
23365 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4) {
23366 for (uint32_t n = 8; n <= 12; n += 4) {
23367 for (size_t k = 1; k <= 40; k += 9) {
23368 GemmMicrokernelTester()
23369 .mr(3)
23370 .nr(4)
23371 .kr(2)
23372 .sr(4)
23373 .m(3)
23374 .n(n)
23375 .k(k)
23376 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
23377 }
23378 }
23379 }
23380
23381 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
23382 for (uint32_t n = 8; n <= 12; n += 4) {
23383 for (size_t k = 1; k <= 40; k += 9) {
23384 GemmMicrokernelTester()
23385 .mr(3)
23386 .nr(4)
23387 .kr(2)
23388 .sr(4)
23389 .m(3)
23390 .n(n)
23391 .k(k)
23392 .cn_stride(7)
23393 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
23394 }
23395 }
23396 }
23397
23398 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
23399 for (uint32_t n = 8; n <= 12; n += 4) {
23400 for (size_t k = 1; k <= 40; k += 9) {
23401 for (uint32_t m = 1; m <= 3; m++) {
23402 GemmMicrokernelTester()
23403 .mr(3)
23404 .nr(4)
23405 .kr(2)
23406 .sr(4)
23407 .m(m)
23408 .n(n)
23409 .k(k)
23410 .iterations(1)
23411 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
23412 }
23413 }
23414 }
23415 }
23416
23417 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, small_kernel) {
23418 for (size_t k = 1; k <= 40; k += 9) {
23419 GemmMicrokernelTester()
23420 .mr(3)
23421 .nr(4)
23422 .kr(2)
23423 .sr(4)
23424 .m(3)
23425 .n(4)
23426 .k(k)
23427 .ks(3)
23428 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
23429 }
23430 }
23431
23432 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, small_kernel_subtile) {
23433 for (size_t k = 1; k <= 40; k += 9) {
23434 for (uint32_t n = 1; n <= 4; n++) {
23435 for (uint32_t m = 1; m <= 3; m++) {
23436 GemmMicrokernelTester()
23437 .mr(3)
23438 .nr(4)
23439 .kr(2)
23440 .sr(4)
23441 .m(m)
23442 .n(n)
23443 .k(k)
23444 .ks(3)
23445 .iterations(1)
23446 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
23447 }
23448 }
23449 }
23450 }
23451
23452 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4_small_kernel) {
23453 for (uint32_t n = 5; n < 8; n++) {
23454 for (size_t k = 1; k <= 40; k += 9) {
23455 GemmMicrokernelTester()
23456 .mr(3)
23457 .nr(4)
23458 .kr(2)
23459 .sr(4)
23460 .m(3)
23461 .n(n)
23462 .k(k)
23463 .ks(3)
23464 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
23465 }
23466 }
23467 }
23468
23469 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4_small_kernel) {
23470 for (uint32_t n = 8; n <= 12; n += 4) {
23471 for (size_t k = 1; k <= 40; k += 9) {
23472 GemmMicrokernelTester()
23473 .mr(3)
23474 .nr(4)
23475 .kr(2)
23476 .sr(4)
23477 .m(3)
23478 .n(n)
23479 .k(k)
23480 .ks(3)
23481 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
23482 }
23483 }
23484 }
23485
23486 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
23487 for (size_t k = 1; k <= 40; k += 9) {
23488 for (uint32_t n = 1; n <= 4; n++) {
23489 for (uint32_t m = 1; m <= 3; m++) {
23490 GemmMicrokernelTester()
23491 .mr(3)
23492 .nr(4)
23493 .kr(2)
23494 .sr(4)
23495 .m(m)
23496 .n(n)
23497 .k(k)
23498 .cm_stride(7)
23499 .iterations(1)
23500 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
23501 }
23502 }
23503 }
23504 }
23505
23506 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, a_offset) {
23507 for (size_t k = 1; k <= 40; k += 9) {
23508 GemmMicrokernelTester()
23509 .mr(3)
23510 .nr(4)
23511 .kr(2)
23512 .sr(4)
23513 .m(3)
23514 .n(4)
23515 .k(k)
23516 .ks(3)
23517 .a_offset(127)
23518 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
23519 }
23520 }
23521
23522 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, zero) {
23523 for (size_t k = 1; k <= 40; k += 9) {
23524 for (uint32_t mz = 0; mz < 3; mz++) {
23525 GemmMicrokernelTester()
23526 .mr(3)
23527 .nr(4)
23528 .kr(2)
23529 .sr(4)
23530 .m(3)
23531 .n(4)
23532 .k(k)
23533 .ks(3)
23534 .a_offset(127)
23535 .zero_index(mz)
23536 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
23537 }
23538 }
23539 }
23540
23541 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, qmin) {
23542 GemmMicrokernelTester()
23543 .mr(3)
23544 .nr(4)
23545 .kr(2)
23546 .sr(4)
23547 .m(3)
23548 .n(4)
23549 .k(8)
23550 .qmin(128)
23551 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
23552 }
23553
23554 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, qmax) {
23555 GemmMicrokernelTester()
23556 .mr(3)
23557 .nr(4)
23558 .kr(2)
23559 .sr(4)
23560 .m(3)
23561 .n(4)
23562 .k(8)
23563 .qmax(128)
23564 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
23565 }
23566
23567 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, strided_cm) {
23568 GemmMicrokernelTester()
23569 .mr(3)
23570 .nr(4)
23571 .kr(2)
23572 .sr(4)
23573 .m(3)
23574 .n(4)
23575 .k(8)
23576 .cm_stride(7)
23577 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
23578 }
23579
23580 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, no_a_zero_point) {
23581 for (size_t k = 1; k <= 40; k += 9) {
23582 GemmMicrokernelTester()
23583 .mr(3)
23584 .nr(4)
23585 .kr(2)
23586 .sr(4)
23587 .m(3)
23588 .n(4)
23589 .k(k)
23590 .a_zero_point(0)
23591 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
23592 }
23593 }
23594
23595 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, no_b_zero_point) {
23596 for (size_t k = 1; k <= 40; k += 9) {
23597 GemmMicrokernelTester()
23598 .mr(3)
23599 .nr(4)
23600 .kr(2)
23601 .sr(4)
23602 .m(3)
23603 .n(4)
23604 .k(k)
23605 .b_zero_point(0)
23606 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
23607 }
23608 }
23609
23610 TEST(QU8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, no_zero_point) {
23611 for (size_t k = 1; k <= 40; k += 9) {
23612 GemmMicrokernelTester()
23613 .mr(3)
23614 .nr(4)
23615 .kr(2)
23616 .sr(4)
23617 .m(3)
23618 .n(4)
23619 .k(k)
23620 .a_zero_point(0)
23621 .b_zero_point(0)
23622 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
23623 }
23624 }
23625#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
23626
23627
23628#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
23629 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8) {
23630 GemmMicrokernelTester()
23631 .mr(4)
23632 .nr(4)
23633 .kr(2)
23634 .sr(4)
23635 .m(4)
23636 .n(4)
23637 .k(8)
23638 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
23639 }
23640
23641 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, strided_cn) {
23642 GemmMicrokernelTester()
23643 .mr(4)
23644 .nr(4)
23645 .kr(2)
23646 .sr(4)
23647 .m(4)
23648 .n(4)
23649 .k(8)
23650 .cn_stride(7)
23651 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
23652 }
23653
23654 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
23655 for (uint32_t n = 1; n <= 4; n++) {
23656 for (uint32_t m = 1; m <= 4; m++) {
23657 GemmMicrokernelTester()
23658 .mr(4)
23659 .nr(4)
23660 .kr(2)
23661 .sr(4)
23662 .m(m)
23663 .n(n)
23664 .k(8)
23665 .iterations(1)
23666 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
23667 }
23668 }
23669 }
23670
23671 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
23672 for (uint32_t m = 1; m <= 4; m++) {
23673 GemmMicrokernelTester()
23674 .mr(4)
23675 .nr(4)
23676 .kr(2)
23677 .sr(4)
23678 .m(m)
23679 .n(4)
23680 .k(8)
23681 .iterations(1)
23682 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
23683 }
23684 }
23685
23686 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
23687 for (uint32_t n = 1; n <= 4; n++) {
23688 GemmMicrokernelTester()
23689 .mr(4)
23690 .nr(4)
23691 .kr(2)
23692 .sr(4)
23693 .m(4)
23694 .n(n)
23695 .k(8)
23696 .iterations(1)
23697 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
23698 }
23699 }
23700
23701 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, k_lt_8) {
23702 for (size_t k = 1; k < 8; k++) {
23703 GemmMicrokernelTester()
23704 .mr(4)
23705 .nr(4)
23706 .kr(2)
23707 .sr(4)
23708 .m(4)
23709 .n(4)
23710 .k(k)
23711 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
23712 }
23713 }
23714
23715 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
23716 for (size_t k = 1; k < 8; k++) {
23717 for (uint32_t n = 1; n <= 4; n++) {
23718 for (uint32_t m = 1; m <= 4; m++) {
23719 GemmMicrokernelTester()
23720 .mr(4)
23721 .nr(4)
23722 .kr(2)
23723 .sr(4)
23724 .m(m)
23725 .n(n)
23726 .k(k)
23727 .iterations(1)
23728 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
23729 }
23730 }
23731 }
23732 }
23733
23734 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, k_gt_8) {
23735 for (size_t k = 9; k < 16; k++) {
23736 GemmMicrokernelTester()
23737 .mr(4)
23738 .nr(4)
23739 .kr(2)
23740 .sr(4)
23741 .m(4)
23742 .n(4)
23743 .k(k)
23744 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
23745 }
23746 }
23747
23748 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
23749 for (size_t k = 9; k < 16; k++) {
23750 for (uint32_t n = 1; n <= 4; n++) {
23751 for (uint32_t m = 1; m <= 4; m++) {
23752 GemmMicrokernelTester()
23753 .mr(4)
23754 .nr(4)
23755 .kr(2)
23756 .sr(4)
23757 .m(m)
23758 .n(n)
23759 .k(k)
23760 .iterations(1)
23761 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
23762 }
23763 }
23764 }
23765 }
23766
23767 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, k_div_8) {
23768 for (size_t k = 16; k <= 80; k += 8) {
23769 GemmMicrokernelTester()
23770 .mr(4)
23771 .nr(4)
23772 .kr(2)
23773 .sr(4)
23774 .m(4)
23775 .n(4)
23776 .k(k)
23777 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
23778 }
23779 }
23780
23781 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
23782 for (size_t k = 16; k <= 80; k += 8) {
23783 for (uint32_t n = 1; n <= 4; n++) {
23784 for (uint32_t m = 1; m <= 4; m++) {
23785 GemmMicrokernelTester()
23786 .mr(4)
23787 .nr(4)
23788 .kr(2)
23789 .sr(4)
23790 .m(m)
23791 .n(n)
23792 .k(k)
23793 .iterations(1)
23794 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
23795 }
23796 }
23797 }
23798 }
23799
23800 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4) {
23801 for (uint32_t n = 5; n < 8; n++) {
23802 for (size_t k = 1; k <= 40; k += 9) {
23803 GemmMicrokernelTester()
23804 .mr(4)
23805 .nr(4)
23806 .kr(2)
23807 .sr(4)
23808 .m(4)
23809 .n(n)
23810 .k(k)
23811 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
23812 }
23813 }
23814 }
23815
23816 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
23817 for (uint32_t n = 5; n < 8; n++) {
23818 for (size_t k = 1; k <= 40; k += 9) {
23819 GemmMicrokernelTester()
23820 .mr(4)
23821 .nr(4)
23822 .kr(2)
23823 .sr(4)
23824 .m(4)
23825 .n(n)
23826 .k(k)
23827 .cn_stride(7)
23828 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
23829 }
23830 }
23831 }
23832
23833 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
23834 for (uint32_t n = 5; n < 8; n++) {
23835 for (size_t k = 1; k <= 40; k += 9) {
23836 for (uint32_t m = 1; m <= 4; m++) {
23837 GemmMicrokernelTester()
23838 .mr(4)
23839 .nr(4)
23840 .kr(2)
23841 .sr(4)
23842 .m(m)
23843 .n(n)
23844 .k(k)
23845 .iterations(1)
23846 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
23847 }
23848 }
23849 }
23850 }
23851
23852 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4) {
23853 for (uint32_t n = 8; n <= 12; n += 4) {
23854 for (size_t k = 1; k <= 40; k += 9) {
23855 GemmMicrokernelTester()
23856 .mr(4)
23857 .nr(4)
23858 .kr(2)
23859 .sr(4)
23860 .m(4)
23861 .n(n)
23862 .k(k)
23863 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
23864 }
23865 }
23866 }
23867
23868 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
23869 for (uint32_t n = 8; n <= 12; n += 4) {
23870 for (size_t k = 1; k <= 40; k += 9) {
23871 GemmMicrokernelTester()
23872 .mr(4)
23873 .nr(4)
23874 .kr(2)
23875 .sr(4)
23876 .m(4)
23877 .n(n)
23878 .k(k)
23879 .cn_stride(7)
23880 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
23881 }
23882 }
23883 }
23884
23885 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
23886 for (uint32_t n = 8; n <= 12; n += 4) {
23887 for (size_t k = 1; k <= 40; k += 9) {
23888 for (uint32_t m = 1; m <= 4; m++) {
23889 GemmMicrokernelTester()
23890 .mr(4)
23891 .nr(4)
23892 .kr(2)
23893 .sr(4)
23894 .m(m)
23895 .n(n)
23896 .k(k)
23897 .iterations(1)
23898 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
23899 }
23900 }
23901 }
23902 }
23903
23904 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, small_kernel) {
23905 for (size_t k = 1; k <= 40; k += 9) {
23906 GemmMicrokernelTester()
23907 .mr(4)
23908 .nr(4)
23909 .kr(2)
23910 .sr(4)
23911 .m(4)
23912 .n(4)
23913 .k(k)
23914 .ks(3)
23915 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
23916 }
23917 }
23918
23919 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, small_kernel_subtile) {
23920 for (size_t k = 1; k <= 40; k += 9) {
23921 for (uint32_t n = 1; n <= 4; n++) {
23922 for (uint32_t m = 1; m <= 4; m++) {
23923 GemmMicrokernelTester()
23924 .mr(4)
23925 .nr(4)
23926 .kr(2)
23927 .sr(4)
23928 .m(m)
23929 .n(n)
23930 .k(k)
23931 .ks(3)
23932 .iterations(1)
23933 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
23934 }
23935 }
23936 }
23937 }
23938
23939 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4_small_kernel) {
23940 for (uint32_t n = 5; n < 8; n++) {
23941 for (size_t k = 1; k <= 40; k += 9) {
23942 GemmMicrokernelTester()
23943 .mr(4)
23944 .nr(4)
23945 .kr(2)
23946 .sr(4)
23947 .m(4)
23948 .n(n)
23949 .k(k)
23950 .ks(3)
23951 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
23952 }
23953 }
23954 }
23955
23956 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4_small_kernel) {
23957 for (uint32_t n = 8; n <= 12; n += 4) {
23958 for (size_t k = 1; k <= 40; k += 9) {
23959 GemmMicrokernelTester()
23960 .mr(4)
23961 .nr(4)
23962 .kr(2)
23963 .sr(4)
23964 .m(4)
23965 .n(n)
23966 .k(k)
23967 .ks(3)
23968 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
23969 }
23970 }
23971 }
23972
23973 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
23974 for (size_t k = 1; k <= 40; k += 9) {
23975 for (uint32_t n = 1; n <= 4; n++) {
23976 for (uint32_t m = 1; m <= 4; m++) {
23977 GemmMicrokernelTester()
23978 .mr(4)
23979 .nr(4)
23980 .kr(2)
23981 .sr(4)
23982 .m(m)
23983 .n(n)
23984 .k(k)
23985 .cm_stride(7)
23986 .iterations(1)
23987 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
23988 }
23989 }
23990 }
23991 }
23992
23993 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, a_offset) {
23994 for (size_t k = 1; k <= 40; k += 9) {
23995 GemmMicrokernelTester()
23996 .mr(4)
23997 .nr(4)
23998 .kr(2)
23999 .sr(4)
24000 .m(4)
24001 .n(4)
24002 .k(k)
24003 .ks(3)
24004 .a_offset(163)
24005 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
24006 }
24007 }
24008
24009 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, zero) {
24010 for (size_t k = 1; k <= 40; k += 9) {
24011 for (uint32_t mz = 0; mz < 4; mz++) {
24012 GemmMicrokernelTester()
24013 .mr(4)
24014 .nr(4)
24015 .kr(2)
24016 .sr(4)
24017 .m(4)
24018 .n(4)
24019 .k(k)
24020 .ks(3)
24021 .a_offset(163)
24022 .zero_index(mz)
24023 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
24024 }
24025 }
24026 }
24027
24028 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, qmin) {
24029 GemmMicrokernelTester()
24030 .mr(4)
24031 .nr(4)
24032 .kr(2)
24033 .sr(4)
24034 .m(4)
24035 .n(4)
24036 .k(8)
24037 .qmin(128)
24038 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
24039 }
24040
24041 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, qmax) {
24042 GemmMicrokernelTester()
24043 .mr(4)
24044 .nr(4)
24045 .kr(2)
24046 .sr(4)
24047 .m(4)
24048 .n(4)
24049 .k(8)
24050 .qmax(128)
24051 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
24052 }
24053
24054 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, strided_cm) {
24055 GemmMicrokernelTester()
24056 .mr(4)
24057 .nr(4)
24058 .kr(2)
24059 .sr(4)
24060 .m(4)
24061 .n(4)
24062 .k(8)
24063 .cm_stride(7)
24064 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
24065 }
24066
24067 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, no_a_zero_point) {
24068 for (size_t k = 1; k <= 40; k += 9) {
24069 GemmMicrokernelTester()
24070 .mr(4)
24071 .nr(4)
24072 .kr(2)
24073 .sr(4)
24074 .m(4)
24075 .n(4)
24076 .k(k)
24077 .a_zero_point(0)
24078 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
24079 }
24080 }
24081
24082 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, no_b_zero_point) {
24083 for (size_t k = 1; k <= 40; k += 9) {
24084 GemmMicrokernelTester()
24085 .mr(4)
24086 .nr(4)
24087 .kr(2)
24088 .sr(4)
24089 .m(4)
24090 .n(4)
24091 .k(k)
24092 .b_zero_point(0)
24093 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
24094 }
24095 }
24096
24097 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, no_zero_point) {
24098 for (size_t k = 1; k <= 40; k += 9) {
24099 GemmMicrokernelTester()
24100 .mr(4)
24101 .nr(4)
24102 .kr(2)
24103 .sr(4)
24104 .m(4)
24105 .n(4)
24106 .k(k)
24107 .a_zero_point(0)
24108 .b_zero_point(0)
24109 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
24110 }
24111 }
24112#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
24113
24114
24115#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
24116 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8) {
24117 GemmMicrokernelTester()
24118 .mr(2)
24119 .nr(4)
24120 .kr(2)
24121 .sr(4)
24122 .m(2)
24123 .n(4)
24124 .k(8)
24125 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
24126 }
24127
24128 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cn) {
24129 GemmMicrokernelTester()
24130 .mr(2)
24131 .nr(4)
24132 .kr(2)
24133 .sr(4)
24134 .m(2)
24135 .n(4)
24136 .k(8)
24137 .cn_stride(7)
24138 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
24139 }
24140
24141 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
24142 for (uint32_t n = 1; n <= 4; n++) {
24143 for (uint32_t m = 1; m <= 2; m++) {
24144 GemmMicrokernelTester()
24145 .mr(2)
24146 .nr(4)
24147 .kr(2)
24148 .sr(4)
24149 .m(m)
24150 .n(n)
24151 .k(8)
24152 .iterations(1)
24153 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
24154 }
24155 }
24156 }
24157
24158 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
24159 for (uint32_t m = 1; m <= 2; m++) {
24160 GemmMicrokernelTester()
24161 .mr(2)
24162 .nr(4)
24163 .kr(2)
24164 .sr(4)
24165 .m(m)
24166 .n(4)
24167 .k(8)
24168 .iterations(1)
24169 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
24170 }
24171 }
24172
24173 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
24174 for (uint32_t n = 1; n <= 4; n++) {
24175 GemmMicrokernelTester()
24176 .mr(2)
24177 .nr(4)
24178 .kr(2)
24179 .sr(4)
24180 .m(2)
24181 .n(n)
24182 .k(8)
24183 .iterations(1)
24184 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
24185 }
24186 }
24187
24188 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8) {
24189 for (size_t k = 1; k < 8; k++) {
24190 GemmMicrokernelTester()
24191 .mr(2)
24192 .nr(4)
24193 .kr(2)
24194 .sr(4)
24195 .m(2)
24196 .n(4)
24197 .k(k)
24198 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
24199 }
24200 }
24201
24202 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
24203 for (size_t k = 1; k < 8; k++) {
24204 for (uint32_t n = 1; n <= 4; n++) {
24205 for (uint32_t m = 1; m <= 2; m++) {
24206 GemmMicrokernelTester()
24207 .mr(2)
24208 .nr(4)
24209 .kr(2)
24210 .sr(4)
24211 .m(m)
24212 .n(n)
24213 .k(k)
24214 .iterations(1)
24215 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
24216 }
24217 }
24218 }
24219 }
24220
24221 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8) {
24222 for (size_t k = 9; k < 16; k++) {
24223 GemmMicrokernelTester()
24224 .mr(2)
24225 .nr(4)
24226 .kr(2)
24227 .sr(4)
24228 .m(2)
24229 .n(4)
24230 .k(k)
24231 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
24232 }
24233 }
24234
24235 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
24236 for (size_t k = 9; k < 16; k++) {
24237 for (uint32_t n = 1; n <= 4; n++) {
24238 for (uint32_t m = 1; m <= 2; m++) {
24239 GemmMicrokernelTester()
24240 .mr(2)
24241 .nr(4)
24242 .kr(2)
24243 .sr(4)
24244 .m(m)
24245 .n(n)
24246 .k(k)
24247 .iterations(1)
24248 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
24249 }
24250 }
24251 }
24252 }
24253
24254 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8) {
24255 for (size_t k = 16; k <= 80; k += 8) {
24256 GemmMicrokernelTester()
24257 .mr(2)
24258 .nr(4)
24259 .kr(2)
24260 .sr(4)
24261 .m(2)
24262 .n(4)
24263 .k(k)
24264 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
24265 }
24266 }
24267
24268 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
24269 for (size_t k = 16; k <= 80; k += 8) {
24270 for (uint32_t n = 1; n <= 4; n++) {
24271 for (uint32_t m = 1; m <= 2; m++) {
24272 GemmMicrokernelTester()
24273 .mr(2)
24274 .nr(4)
24275 .kr(2)
24276 .sr(4)
24277 .m(m)
24278 .n(n)
24279 .k(k)
24280 .iterations(1)
24281 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
24282 }
24283 }
24284 }
24285 }
24286
24287 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4) {
24288 for (uint32_t n = 5; n < 8; n++) {
24289 for (size_t k = 1; k <= 40; k += 9) {
24290 GemmMicrokernelTester()
24291 .mr(2)
24292 .nr(4)
24293 .kr(2)
24294 .sr(4)
24295 .m(2)
24296 .n(n)
24297 .k(k)
24298 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
24299 }
24300 }
24301 }
24302
24303 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
24304 for (uint32_t n = 5; n < 8; n++) {
24305 for (size_t k = 1; k <= 40; k += 9) {
24306 GemmMicrokernelTester()
24307 .mr(2)
24308 .nr(4)
24309 .kr(2)
24310 .sr(4)
24311 .m(2)
24312 .n(n)
24313 .k(k)
24314 .cn_stride(7)
24315 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
24316 }
24317 }
24318 }
24319
24320 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
24321 for (uint32_t n = 5; n < 8; n++) {
24322 for (size_t k = 1; k <= 40; k += 9) {
24323 for (uint32_t m = 1; m <= 2; m++) {
24324 GemmMicrokernelTester()
24325 .mr(2)
24326 .nr(4)
24327 .kr(2)
24328 .sr(4)
24329 .m(m)
24330 .n(n)
24331 .k(k)
24332 .iterations(1)
24333 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
24334 }
24335 }
24336 }
24337 }
24338
24339 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4) {
24340 for (uint32_t n = 8; n <= 12; n += 4) {
24341 for (size_t k = 1; k <= 40; k += 9) {
24342 GemmMicrokernelTester()
24343 .mr(2)
24344 .nr(4)
24345 .kr(2)
24346 .sr(4)
24347 .m(2)
24348 .n(n)
24349 .k(k)
24350 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
24351 }
24352 }
24353 }
24354
24355 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
24356 for (uint32_t n = 8; n <= 12; n += 4) {
24357 for (size_t k = 1; k <= 40; k += 9) {
24358 GemmMicrokernelTester()
24359 .mr(2)
24360 .nr(4)
24361 .kr(2)
24362 .sr(4)
24363 .m(2)
24364 .n(n)
24365 .k(k)
24366 .cn_stride(7)
24367 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
24368 }
24369 }
24370 }
24371
24372 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
24373 for (uint32_t n = 8; n <= 12; n += 4) {
24374 for (size_t k = 1; k <= 40; k += 9) {
24375 for (uint32_t m = 1; m <= 2; m++) {
24376 GemmMicrokernelTester()
24377 .mr(2)
24378 .nr(4)
24379 .kr(2)
24380 .sr(4)
24381 .m(m)
24382 .n(n)
24383 .k(k)
24384 .iterations(1)
24385 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
24386 }
24387 }
24388 }
24389 }
24390
24391 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, small_kernel) {
24392 for (size_t k = 1; k <= 40; k += 9) {
24393 GemmMicrokernelTester()
24394 .mr(2)
24395 .nr(4)
24396 .kr(2)
24397 .sr(4)
24398 .m(2)
24399 .n(4)
24400 .k(k)
24401 .ks(3)
24402 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
24403 }
24404 }
24405
24406 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, small_kernel_subtile) {
24407 for (size_t k = 1; k <= 40; k += 9) {
24408 for (uint32_t n = 1; n <= 4; n++) {
24409 for (uint32_t m = 1; m <= 2; m++) {
24410 GemmMicrokernelTester()
24411 .mr(2)
24412 .nr(4)
24413 .kr(2)
24414 .sr(4)
24415 .m(m)
24416 .n(n)
24417 .k(k)
24418 .ks(3)
24419 .iterations(1)
24420 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
24421 }
24422 }
24423 }
24424 }
24425
24426 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_small_kernel) {
24427 for (uint32_t n = 5; n < 8; n++) {
24428 for (size_t k = 1; k <= 40; k += 9) {
24429 GemmMicrokernelTester()
24430 .mr(2)
24431 .nr(4)
24432 .kr(2)
24433 .sr(4)
24434 .m(2)
24435 .n(n)
24436 .k(k)
24437 .ks(3)
24438 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
24439 }
24440 }
24441 }
24442
24443 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_small_kernel) {
24444 for (uint32_t n = 8; n <= 12; n += 4) {
24445 for (size_t k = 1; k <= 40; k += 9) {
24446 GemmMicrokernelTester()
24447 .mr(2)
24448 .nr(4)
24449 .kr(2)
24450 .sr(4)
24451 .m(2)
24452 .n(n)
24453 .k(k)
24454 .ks(3)
24455 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
24456 }
24457 }
24458 }
24459
24460 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
24461 for (size_t k = 1; k <= 40; k += 9) {
24462 for (uint32_t n = 1; n <= 4; n++) {
24463 for (uint32_t m = 1; m <= 2; m++) {
24464 GemmMicrokernelTester()
24465 .mr(2)
24466 .nr(4)
24467 .kr(2)
24468 .sr(4)
24469 .m(m)
24470 .n(n)
24471 .k(k)
24472 .cm_stride(7)
24473 .iterations(1)
24474 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
24475 }
24476 }
24477 }
24478 }
24479
24480 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, a_offset) {
24481 for (size_t k = 1; k <= 40; k += 9) {
24482 GemmMicrokernelTester()
24483 .mr(2)
24484 .nr(4)
24485 .kr(2)
24486 .sr(4)
24487 .m(2)
24488 .n(4)
24489 .k(k)
24490 .ks(3)
24491 .a_offset(83)
24492 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
24493 }
24494 }
24495
24496 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, zero) {
24497 for (size_t k = 1; k <= 40; k += 9) {
24498 for (uint32_t mz = 0; mz < 2; mz++) {
24499 GemmMicrokernelTester()
24500 .mr(2)
24501 .nr(4)
24502 .kr(2)
24503 .sr(4)
24504 .m(2)
24505 .n(4)
24506 .k(k)
24507 .ks(3)
24508 .a_offset(83)
24509 .zero_index(mz)
24510 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
24511 }
24512 }
24513 }
24514
24515 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, qmin) {
24516 GemmMicrokernelTester()
24517 .mr(2)
24518 .nr(4)
24519 .kr(2)
24520 .sr(4)
24521 .m(2)
24522 .n(4)
24523 .k(8)
24524 .qmin(128)
24525 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
24526 }
24527
24528 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, qmax) {
24529 GemmMicrokernelTester()
24530 .mr(2)
24531 .nr(4)
24532 .kr(2)
24533 .sr(4)
24534 .m(2)
24535 .n(4)
24536 .k(8)
24537 .qmax(128)
24538 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
24539 }
24540
24541 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cm) {
24542 GemmMicrokernelTester()
24543 .mr(2)
24544 .nr(4)
24545 .kr(2)
24546 .sr(4)
24547 .m(2)
24548 .n(4)
24549 .k(8)
24550 .cm_stride(7)
24551 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
24552 }
24553
24554 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, no_a_zero_point) {
24555 for (size_t k = 1; k <= 40; k += 9) {
24556 GemmMicrokernelTester()
24557 .mr(2)
24558 .nr(4)
24559 .kr(2)
24560 .sr(4)
24561 .m(2)
24562 .n(4)
24563 .k(k)
24564 .a_zero_point(0)
24565 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
24566 }
24567 }
24568
24569 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, no_b_zero_point) {
24570 for (size_t k = 1; k <= 40; k += 9) {
24571 GemmMicrokernelTester()
24572 .mr(2)
24573 .nr(4)
24574 .kr(2)
24575 .sr(4)
24576 .m(2)
24577 .n(4)
24578 .k(k)
24579 .b_zero_point(0)
24580 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
24581 }
24582 }
24583
24584 TEST(QU8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, no_zero_point) {
24585 for (size_t k = 1; k <= 40; k += 9) {
24586 GemmMicrokernelTester()
24587 .mr(2)
24588 .nr(4)
24589 .kr(2)
24590 .sr(4)
24591 .m(2)
24592 .n(4)
24593 .k(k)
24594 .a_zero_point(0)
24595 .b_zero_point(0)
24596 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
24597 }
24598 }
24599#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
24600
24601
24602#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
24603 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8) {
24604 GemmMicrokernelTester()
24605 .mr(4)
24606 .nr(4)
24607 .kr(2)
24608 .sr(4)
24609 .m(4)
24610 .n(4)
24611 .k(8)
24612 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
24613 }
24614
24615 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cn) {
24616 GemmMicrokernelTester()
24617 .mr(4)
24618 .nr(4)
24619 .kr(2)
24620 .sr(4)
24621 .m(4)
24622 .n(4)
24623 .k(8)
24624 .cn_stride(7)
24625 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
24626 }
24627
24628 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
24629 for (uint32_t n = 1; n <= 4; n++) {
24630 for (uint32_t m = 1; m <= 4; m++) {
24631 GemmMicrokernelTester()
24632 .mr(4)
24633 .nr(4)
24634 .kr(2)
24635 .sr(4)
24636 .m(m)
24637 .n(n)
24638 .k(8)
24639 .iterations(1)
24640 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
24641 }
24642 }
24643 }
24644
24645 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
24646 for (uint32_t m = 1; m <= 4; m++) {
24647 GemmMicrokernelTester()
24648 .mr(4)
24649 .nr(4)
24650 .kr(2)
24651 .sr(4)
24652 .m(m)
24653 .n(4)
24654 .k(8)
24655 .iterations(1)
24656 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
24657 }
24658 }
24659
24660 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
24661 for (uint32_t n = 1; n <= 4; n++) {
24662 GemmMicrokernelTester()
24663 .mr(4)
24664 .nr(4)
24665 .kr(2)
24666 .sr(4)
24667 .m(4)
24668 .n(n)
24669 .k(8)
24670 .iterations(1)
24671 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
24672 }
24673 }
24674
24675 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8) {
24676 for (size_t k = 1; k < 8; k++) {
24677 GemmMicrokernelTester()
24678 .mr(4)
24679 .nr(4)
24680 .kr(2)
24681 .sr(4)
24682 .m(4)
24683 .n(4)
24684 .k(k)
24685 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
24686 }
24687 }
24688
24689 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
24690 for (size_t k = 1; k < 8; k++) {
24691 for (uint32_t n = 1; n <= 4; n++) {
24692 for (uint32_t m = 1; m <= 4; m++) {
24693 GemmMicrokernelTester()
24694 .mr(4)
24695 .nr(4)
24696 .kr(2)
24697 .sr(4)
24698 .m(m)
24699 .n(n)
24700 .k(k)
24701 .iterations(1)
24702 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
24703 }
24704 }
24705 }
24706 }
24707
24708 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8) {
24709 for (size_t k = 9; k < 16; k++) {
24710 GemmMicrokernelTester()
24711 .mr(4)
24712 .nr(4)
24713 .kr(2)
24714 .sr(4)
24715 .m(4)
24716 .n(4)
24717 .k(k)
24718 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
24719 }
24720 }
24721
24722 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
24723 for (size_t k = 9; k < 16; k++) {
24724 for (uint32_t n = 1; n <= 4; n++) {
24725 for (uint32_t m = 1; m <= 4; m++) {
24726 GemmMicrokernelTester()
24727 .mr(4)
24728 .nr(4)
24729 .kr(2)
24730 .sr(4)
24731 .m(m)
24732 .n(n)
24733 .k(k)
24734 .iterations(1)
24735 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
24736 }
24737 }
24738 }
24739 }
24740
24741 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8) {
24742 for (size_t k = 16; k <= 80; k += 8) {
24743 GemmMicrokernelTester()
24744 .mr(4)
24745 .nr(4)
24746 .kr(2)
24747 .sr(4)
24748 .m(4)
24749 .n(4)
24750 .k(k)
24751 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
24752 }
24753 }
24754
24755 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
24756 for (size_t k = 16; k <= 80; k += 8) {
24757 for (uint32_t n = 1; n <= 4; n++) {
24758 for (uint32_t m = 1; m <= 4; m++) {
24759 GemmMicrokernelTester()
24760 .mr(4)
24761 .nr(4)
24762 .kr(2)
24763 .sr(4)
24764 .m(m)
24765 .n(n)
24766 .k(k)
24767 .iterations(1)
24768 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
24769 }
24770 }
24771 }
24772 }
24773
24774 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4) {
24775 for (uint32_t n = 5; n < 8; n++) {
24776 for (size_t k = 1; k <= 40; k += 9) {
24777 GemmMicrokernelTester()
24778 .mr(4)
24779 .nr(4)
24780 .kr(2)
24781 .sr(4)
24782 .m(4)
24783 .n(n)
24784 .k(k)
24785 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
24786 }
24787 }
24788 }
24789
24790 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
24791 for (uint32_t n = 5; n < 8; n++) {
24792 for (size_t k = 1; k <= 40; k += 9) {
24793 GemmMicrokernelTester()
24794 .mr(4)
24795 .nr(4)
24796 .kr(2)
24797 .sr(4)
24798 .m(4)
24799 .n(n)
24800 .k(k)
24801 .cn_stride(7)
24802 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
24803 }
24804 }
24805 }
24806
24807 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
24808 for (uint32_t n = 5; n < 8; n++) {
24809 for (size_t k = 1; k <= 40; k += 9) {
24810 for (uint32_t m = 1; m <= 4; m++) {
24811 GemmMicrokernelTester()
24812 .mr(4)
24813 .nr(4)
24814 .kr(2)
24815 .sr(4)
24816 .m(m)
24817 .n(n)
24818 .k(k)
24819 .iterations(1)
24820 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
24821 }
24822 }
24823 }
24824 }
24825
24826 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4) {
24827 for (uint32_t n = 8; n <= 12; n += 4) {
24828 for (size_t k = 1; k <= 40; k += 9) {
24829 GemmMicrokernelTester()
24830 .mr(4)
24831 .nr(4)
24832 .kr(2)
24833 .sr(4)
24834 .m(4)
24835 .n(n)
24836 .k(k)
24837 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
24838 }
24839 }
24840 }
24841
24842 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
24843 for (uint32_t n = 8; n <= 12; n += 4) {
24844 for (size_t k = 1; k <= 40; k += 9) {
24845 GemmMicrokernelTester()
24846 .mr(4)
24847 .nr(4)
24848 .kr(2)
24849 .sr(4)
24850 .m(4)
24851 .n(n)
24852 .k(k)
24853 .cn_stride(7)
24854 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
24855 }
24856 }
24857 }
24858
24859 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
24860 for (uint32_t n = 8; n <= 12; n += 4) {
24861 for (size_t k = 1; k <= 40; k += 9) {
24862 for (uint32_t m = 1; m <= 4; m++) {
24863 GemmMicrokernelTester()
24864 .mr(4)
24865 .nr(4)
24866 .kr(2)
24867 .sr(4)
24868 .m(m)
24869 .n(n)
24870 .k(k)
24871 .iterations(1)
24872 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
24873 }
24874 }
24875 }
24876 }
24877
24878 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, small_kernel) {
24879 for (size_t k = 1; k <= 40; k += 9) {
24880 GemmMicrokernelTester()
24881 .mr(4)
24882 .nr(4)
24883 .kr(2)
24884 .sr(4)
24885 .m(4)
24886 .n(4)
24887 .k(k)
24888 .ks(3)
24889 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
24890 }
24891 }
24892
24893 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, small_kernel_subtile) {
24894 for (size_t k = 1; k <= 40; k += 9) {
24895 for (uint32_t n = 1; n <= 4; n++) {
24896 for (uint32_t m = 1; m <= 4; m++) {
24897 GemmMicrokernelTester()
24898 .mr(4)
24899 .nr(4)
24900 .kr(2)
24901 .sr(4)
24902 .m(m)
24903 .n(n)
24904 .k(k)
24905 .ks(3)
24906 .iterations(1)
24907 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
24908 }
24909 }
24910 }
24911 }
24912
24913 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_small_kernel) {
24914 for (uint32_t n = 5; n < 8; n++) {
24915 for (size_t k = 1; k <= 40; k += 9) {
24916 GemmMicrokernelTester()
24917 .mr(4)
24918 .nr(4)
24919 .kr(2)
24920 .sr(4)
24921 .m(4)
24922 .n(n)
24923 .k(k)
24924 .ks(3)
24925 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
24926 }
24927 }
24928 }
24929
24930 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_small_kernel) {
24931 for (uint32_t n = 8; n <= 12; n += 4) {
24932 for (size_t k = 1; k <= 40; k += 9) {
24933 GemmMicrokernelTester()
24934 .mr(4)
24935 .nr(4)
24936 .kr(2)
24937 .sr(4)
24938 .m(4)
24939 .n(n)
24940 .k(k)
24941 .ks(3)
24942 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
24943 }
24944 }
24945 }
24946
24947 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
24948 for (size_t k = 1; k <= 40; k += 9) {
24949 for (uint32_t n = 1; n <= 4; n++) {
24950 for (uint32_t m = 1; m <= 4; m++) {
24951 GemmMicrokernelTester()
24952 .mr(4)
24953 .nr(4)
24954 .kr(2)
24955 .sr(4)
24956 .m(m)
24957 .n(n)
24958 .k(k)
24959 .cm_stride(7)
24960 .iterations(1)
24961 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
24962 }
24963 }
24964 }
24965 }
24966
24967 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, a_offset) {
24968 for (size_t k = 1; k <= 40; k += 9) {
24969 GemmMicrokernelTester()
24970 .mr(4)
24971 .nr(4)
24972 .kr(2)
24973 .sr(4)
24974 .m(4)
24975 .n(4)
24976 .k(k)
24977 .ks(3)
24978 .a_offset(163)
24979 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
24980 }
24981 }
24982
24983 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, zero) {
24984 for (size_t k = 1; k <= 40; k += 9) {
24985 for (uint32_t mz = 0; mz < 4; mz++) {
24986 GemmMicrokernelTester()
24987 .mr(4)
24988 .nr(4)
24989 .kr(2)
24990 .sr(4)
24991 .m(4)
24992 .n(4)
24993 .k(k)
24994 .ks(3)
24995 .a_offset(163)
24996 .zero_index(mz)
24997 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
24998 }
24999 }
25000 }
25001
25002 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, qmin) {
25003 GemmMicrokernelTester()
25004 .mr(4)
25005 .nr(4)
25006 .kr(2)
25007 .sr(4)
25008 .m(4)
25009 .n(4)
25010 .k(8)
25011 .qmin(128)
25012 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
25013 }
25014
25015 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, qmax) {
25016 GemmMicrokernelTester()
25017 .mr(4)
25018 .nr(4)
25019 .kr(2)
25020 .sr(4)
25021 .m(4)
25022 .n(4)
25023 .k(8)
25024 .qmax(128)
25025 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
25026 }
25027
25028 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cm) {
25029 GemmMicrokernelTester()
25030 .mr(4)
25031 .nr(4)
25032 .kr(2)
25033 .sr(4)
25034 .m(4)
25035 .n(4)
25036 .k(8)
25037 .cm_stride(7)
25038 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
25039 }
25040
25041 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, no_a_zero_point) {
25042 for (size_t k = 1; k <= 40; k += 9) {
25043 GemmMicrokernelTester()
25044 .mr(4)
25045 .nr(4)
25046 .kr(2)
25047 .sr(4)
25048 .m(4)
25049 .n(4)
25050 .k(k)
25051 .a_zero_point(0)
25052 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
25053 }
25054 }
25055
25056 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, no_b_zero_point) {
25057 for (size_t k = 1; k <= 40; k += 9) {
25058 GemmMicrokernelTester()
25059 .mr(4)
25060 .nr(4)
25061 .kr(2)
25062 .sr(4)
25063 .m(4)
25064 .n(4)
25065 .k(k)
25066 .b_zero_point(0)
25067 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
25068 }
25069 }
25070
25071 TEST(QU8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, no_zero_point) {
25072 for (size_t k = 1; k <= 40; k += 9) {
25073 GemmMicrokernelTester()
25074 .mr(4)
25075 .nr(4)
25076 .kr(2)
25077 .sr(4)
25078 .m(4)
25079 .n(4)
25080 .k(k)
25081 .a_zero_point(0)
25082 .b_zero_point(0)
25083 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
25084 }
25085 }
25086#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
25087
25088
25089#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025090 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8) {
25091 GemmMicrokernelTester()
25092 .mr(2)
25093 .nr(4)
25094 .kr(8)
25095 .sr(1)
25096 .m(2)
25097 .n(4)
25098 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080025099 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025100 }
25101
25102 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, strided_cn) {
25103 GemmMicrokernelTester()
25104 .mr(2)
25105 .nr(4)
25106 .kr(8)
25107 .sr(1)
25108 .m(2)
25109 .n(4)
25110 .k(8)
25111 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080025112 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025113 }
25114
25115 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025116 for (uint32_t n = 1; n <= 4; n++) {
25117 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025118 GemmMicrokernelTester()
25119 .mr(2)
25120 .nr(4)
25121 .kr(8)
25122 .sr(1)
25123 .m(m)
25124 .n(n)
25125 .k(8)
25126 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025127 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025128 }
25129 }
25130 }
25131
25132 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
25133 for (uint32_t m = 1; m <= 2; m++) {
25134 GemmMicrokernelTester()
25135 .mr(2)
25136 .nr(4)
25137 .kr(8)
25138 .sr(1)
25139 .m(m)
25140 .n(4)
25141 .k(8)
25142 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025143 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025144 }
25145 }
25146
25147 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
25148 for (uint32_t n = 1; n <= 4; n++) {
25149 GemmMicrokernelTester()
25150 .mr(2)
25151 .nr(4)
25152 .kr(8)
25153 .sr(1)
25154 .m(2)
25155 .n(n)
25156 .k(8)
25157 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025158 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025159 }
25160 }
25161
25162 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8) {
25163 for (size_t k = 1; k < 8; k++) {
25164 GemmMicrokernelTester()
25165 .mr(2)
25166 .nr(4)
25167 .kr(8)
25168 .sr(1)
25169 .m(2)
25170 .n(4)
25171 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080025172 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025173 }
25174 }
25175
25176 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
25177 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025178 for (uint32_t n = 1; n <= 4; n++) {
25179 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025180 GemmMicrokernelTester()
25181 .mr(2)
25182 .nr(4)
25183 .kr(8)
25184 .sr(1)
25185 .m(m)
25186 .n(n)
25187 .k(k)
25188 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025189 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025190 }
25191 }
25192 }
25193 }
25194
25195 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8) {
25196 for (size_t k = 9; k < 16; k++) {
25197 GemmMicrokernelTester()
25198 .mr(2)
25199 .nr(4)
25200 .kr(8)
25201 .sr(1)
25202 .m(2)
25203 .n(4)
25204 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080025205 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025206 }
25207 }
25208
25209 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
25210 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025211 for (uint32_t n = 1; n <= 4; n++) {
25212 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025213 GemmMicrokernelTester()
25214 .mr(2)
25215 .nr(4)
25216 .kr(8)
25217 .sr(1)
25218 .m(m)
25219 .n(n)
25220 .k(k)
25221 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025222 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025223 }
25224 }
25225 }
25226 }
25227
25228 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_div_8) {
25229 for (size_t k = 16; k <= 80; k += 8) {
25230 GemmMicrokernelTester()
25231 .mr(2)
25232 .nr(4)
25233 .kr(8)
25234 .sr(1)
25235 .m(2)
25236 .n(4)
25237 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080025238 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025239 }
25240 }
25241
25242 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
25243 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025244 for (uint32_t n = 1; n <= 4; n++) {
25245 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025246 GemmMicrokernelTester()
25247 .mr(2)
25248 .nr(4)
25249 .kr(8)
25250 .sr(1)
25251 .m(m)
25252 .n(n)
25253 .k(k)
25254 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025255 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025256 }
25257 }
25258 }
25259 }
25260
25261 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4) {
25262 for (uint32_t n = 5; n < 8; n++) {
25263 for (size_t k = 1; k <= 40; k += 9) {
25264 GemmMicrokernelTester()
25265 .mr(2)
25266 .nr(4)
25267 .kr(8)
25268 .sr(1)
25269 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025270 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025271 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080025272 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025273 }
25274 }
25275 }
25276
25277 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
25278 for (uint32_t n = 5; n < 8; n++) {
25279 for (size_t k = 1; k <= 40; k += 9) {
25280 GemmMicrokernelTester()
25281 .mr(2)
25282 .nr(4)
25283 .kr(8)
25284 .sr(1)
25285 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025286 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025287 .k(k)
25288 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080025289 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025290 }
25291 }
25292 }
25293
25294 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
25295 for (uint32_t n = 5; n < 8; n++) {
25296 for (size_t k = 1; k <= 40; k += 9) {
25297 for (uint32_t m = 1; m <= 2; m++) {
25298 GemmMicrokernelTester()
25299 .mr(2)
25300 .nr(4)
25301 .kr(8)
25302 .sr(1)
25303 .m(m)
25304 .n(n)
25305 .k(k)
25306 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025307 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025308 }
25309 }
25310 }
25311 }
25312
25313 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, n_div_4) {
25314 for (uint32_t n = 8; n <= 12; n += 4) {
25315 for (size_t k = 1; k <= 40; k += 9) {
25316 GemmMicrokernelTester()
25317 .mr(2)
25318 .nr(4)
25319 .kr(8)
25320 .sr(1)
25321 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025322 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025323 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080025324 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025325 }
25326 }
25327 }
25328
25329 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
25330 for (uint32_t n = 8; n <= 12; n += 4) {
25331 for (size_t k = 1; k <= 40; k += 9) {
25332 GemmMicrokernelTester()
25333 .mr(2)
25334 .nr(4)
25335 .kr(8)
25336 .sr(1)
25337 .m(2)
25338 .n(n)
25339 .k(k)
25340 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080025341 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025342 }
25343 }
25344 }
25345
25346 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
25347 for (uint32_t n = 8; n <= 12; n += 4) {
25348 for (size_t k = 1; k <= 40; k += 9) {
25349 for (uint32_t m = 1; m <= 2; m++) {
25350 GemmMicrokernelTester()
25351 .mr(2)
25352 .nr(4)
25353 .kr(8)
25354 .sr(1)
25355 .m(m)
25356 .n(n)
25357 .k(k)
25358 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025359 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025360 }
25361 }
25362 }
25363 }
25364
25365 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, small_kernel) {
25366 for (size_t k = 1; k <= 40; k += 9) {
25367 GemmMicrokernelTester()
25368 .mr(2)
25369 .nr(4)
25370 .kr(8)
25371 .sr(1)
25372 .m(2)
25373 .n(4)
25374 .k(k)
25375 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080025376 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025377 }
25378 }
25379
25380 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, small_kernel_subtile) {
25381 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025382 for (uint32_t n = 1; n <= 4; n++) {
25383 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025384 GemmMicrokernelTester()
25385 .mr(2)
25386 .nr(4)
25387 .kr(8)
25388 .sr(1)
25389 .m(m)
25390 .n(n)
25391 .k(k)
25392 .ks(3)
25393 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025394 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025395 }
25396 }
25397 }
25398 }
25399
25400 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_small_kernel) {
25401 for (uint32_t n = 5; n < 8; n++) {
25402 for (size_t k = 1; k <= 40; k += 9) {
25403 GemmMicrokernelTester()
25404 .mr(2)
25405 .nr(4)
25406 .kr(8)
25407 .sr(1)
25408 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025409 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025410 .k(k)
25411 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080025412 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025413 }
25414 }
25415 }
25416
25417 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_small_kernel) {
25418 for (uint32_t n = 8; n <= 12; n += 4) {
25419 for (size_t k = 1; k <= 40; k += 9) {
25420 GemmMicrokernelTester()
25421 .mr(2)
25422 .nr(4)
25423 .kr(8)
25424 .sr(1)
25425 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025426 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025427 .k(k)
25428 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080025429 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025430 }
25431 }
25432 }
25433
25434 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
25435 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025436 for (uint32_t n = 1; n <= 4; n++) {
25437 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025438 GemmMicrokernelTester()
25439 .mr(2)
25440 .nr(4)
25441 .kr(8)
25442 .sr(1)
25443 .m(m)
25444 .n(n)
25445 .k(k)
25446 .cm_stride(7)
25447 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025448 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025449 }
25450 }
25451 }
25452 }
25453
25454 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, a_offset) {
25455 for (size_t k = 1; k <= 40; k += 9) {
25456 GemmMicrokernelTester()
25457 .mr(2)
25458 .nr(4)
25459 .kr(8)
25460 .sr(1)
25461 .m(2)
25462 .n(4)
25463 .k(k)
25464 .ks(3)
25465 .a_offset(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080025466 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025467 }
25468 }
25469
25470 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025471 for (size_t k = 1; k <= 40; k += 9) {
25472 for (uint32_t mz = 0; mz < 2; mz++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025473 GemmMicrokernelTester()
25474 .mr(2)
25475 .nr(4)
25476 .kr(8)
25477 .sr(1)
25478 .m(2)
25479 .n(4)
25480 .k(k)
25481 .ks(3)
25482 .a_offset(83)
25483 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080025484 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025485 }
25486 }
25487 }
25488
25489 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, qmin) {
25490 GemmMicrokernelTester()
25491 .mr(2)
25492 .nr(4)
25493 .kr(8)
25494 .sr(1)
25495 .m(2)
25496 .n(4)
25497 .k(8)
25498 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080025499 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025500 }
25501
25502 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, qmax) {
25503 GemmMicrokernelTester()
25504 .mr(2)
25505 .nr(4)
25506 .kr(8)
25507 .sr(1)
25508 .m(2)
25509 .n(4)
25510 .k(8)
25511 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080025512 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025513 }
25514
25515 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, strided_cm) {
25516 GemmMicrokernelTester()
25517 .mr(2)
25518 .nr(4)
25519 .kr(8)
25520 .sr(1)
25521 .m(2)
25522 .n(4)
25523 .k(8)
25524 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080025525 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025526 }
25527
25528 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, no_a_zero_point) {
25529 for (size_t k = 1; k <= 40; k += 9) {
25530 GemmMicrokernelTester()
25531 .mr(2)
25532 .nr(4)
25533 .kr(8)
25534 .sr(1)
25535 .m(2)
25536 .n(4)
25537 .k(k)
25538 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080025539 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025540 }
25541 }
25542
25543 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, no_b_zero_point) {
25544 for (size_t k = 1; k <= 40; k += 9) {
25545 GemmMicrokernelTester()
25546 .mr(2)
25547 .nr(4)
25548 .kr(8)
25549 .sr(1)
25550 .m(2)
25551 .n(4)
25552 .k(k)
25553 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080025554 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025555 }
25556 }
25557
25558 TEST(QU8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, no_zero_point) {
25559 for (size_t k = 1; k <= 40; k += 9) {
25560 GemmMicrokernelTester()
25561 .mr(2)
25562 .nr(4)
25563 .kr(8)
25564 .sr(1)
25565 .m(2)
25566 .n(4)
25567 .k(k)
25568 .a_zero_point(0)
25569 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080025570 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025571 }
25572 }
Marat Dukhan4c617792021-12-21 15:47:58 -080025573#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025574
25575
Marat Dukhan4c617792021-12-21 15:47:58 -080025576#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025577 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8) {
25578 GemmMicrokernelTester()
25579 .mr(4)
25580 .nr(4)
25581 .kr(8)
25582 .sr(1)
25583 .m(4)
25584 .n(4)
25585 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080025586 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025587 }
25588
25589 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, strided_cn) {
25590 GemmMicrokernelTester()
25591 .mr(4)
25592 .nr(4)
25593 .kr(8)
25594 .sr(1)
25595 .m(4)
25596 .n(4)
25597 .k(8)
25598 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080025599 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025600 }
25601
25602 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025603 for (uint32_t n = 1; n <= 4; n++) {
25604 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025605 GemmMicrokernelTester()
25606 .mr(4)
25607 .nr(4)
25608 .kr(8)
25609 .sr(1)
25610 .m(m)
25611 .n(n)
25612 .k(8)
25613 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025614 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025615 }
25616 }
25617 }
25618
25619 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
25620 for (uint32_t m = 1; m <= 4; m++) {
25621 GemmMicrokernelTester()
25622 .mr(4)
25623 .nr(4)
25624 .kr(8)
25625 .sr(1)
25626 .m(m)
25627 .n(4)
25628 .k(8)
25629 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025630 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025631 }
25632 }
25633
25634 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
25635 for (uint32_t n = 1; n <= 4; n++) {
25636 GemmMicrokernelTester()
25637 .mr(4)
25638 .nr(4)
25639 .kr(8)
25640 .sr(1)
25641 .m(4)
25642 .n(n)
25643 .k(8)
25644 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025645 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025646 }
25647 }
25648
25649 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8) {
25650 for (size_t k = 1; k < 8; k++) {
25651 GemmMicrokernelTester()
25652 .mr(4)
25653 .nr(4)
25654 .kr(8)
25655 .sr(1)
25656 .m(4)
25657 .n(4)
25658 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080025659 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025660 }
25661 }
25662
25663 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
25664 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025665 for (uint32_t n = 1; n <= 4; n++) {
25666 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025667 GemmMicrokernelTester()
25668 .mr(4)
25669 .nr(4)
25670 .kr(8)
25671 .sr(1)
25672 .m(m)
25673 .n(n)
25674 .k(k)
25675 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025676 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025677 }
25678 }
25679 }
25680 }
25681
25682 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8) {
25683 for (size_t k = 9; k < 16; k++) {
25684 GemmMicrokernelTester()
25685 .mr(4)
25686 .nr(4)
25687 .kr(8)
25688 .sr(1)
25689 .m(4)
25690 .n(4)
25691 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080025692 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025693 }
25694 }
25695
25696 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
25697 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025698 for (uint32_t n = 1; n <= 4; n++) {
25699 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025700 GemmMicrokernelTester()
25701 .mr(4)
25702 .nr(4)
25703 .kr(8)
25704 .sr(1)
25705 .m(m)
25706 .n(n)
25707 .k(k)
25708 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025709 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025710 }
25711 }
25712 }
25713 }
25714
25715 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_div_8) {
25716 for (size_t k = 16; k <= 80; k += 8) {
25717 GemmMicrokernelTester()
25718 .mr(4)
25719 .nr(4)
25720 .kr(8)
25721 .sr(1)
25722 .m(4)
25723 .n(4)
25724 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080025725 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025726 }
25727 }
25728
25729 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
25730 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025731 for (uint32_t n = 1; n <= 4; n++) {
25732 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025733 GemmMicrokernelTester()
25734 .mr(4)
25735 .nr(4)
25736 .kr(8)
25737 .sr(1)
25738 .m(m)
25739 .n(n)
25740 .k(k)
25741 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025742 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025743 }
25744 }
25745 }
25746 }
25747
25748 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4) {
25749 for (uint32_t n = 5; n < 8; n++) {
25750 for (size_t k = 1; k <= 40; k += 9) {
25751 GemmMicrokernelTester()
25752 .mr(4)
25753 .nr(4)
25754 .kr(8)
25755 .sr(1)
25756 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025757 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025758 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080025759 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025760 }
25761 }
25762 }
25763
25764 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
25765 for (uint32_t n = 5; n < 8; n++) {
25766 for (size_t k = 1; k <= 40; k += 9) {
25767 GemmMicrokernelTester()
25768 .mr(4)
25769 .nr(4)
25770 .kr(8)
25771 .sr(1)
25772 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025773 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025774 .k(k)
25775 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080025776 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025777 }
25778 }
25779 }
25780
25781 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
25782 for (uint32_t n = 5; n < 8; n++) {
25783 for (size_t k = 1; k <= 40; k += 9) {
25784 for (uint32_t m = 1; m <= 4; m++) {
25785 GemmMicrokernelTester()
25786 .mr(4)
25787 .nr(4)
25788 .kr(8)
25789 .sr(1)
25790 .m(m)
25791 .n(n)
25792 .k(k)
25793 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025794 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025795 }
25796 }
25797 }
25798 }
25799
25800 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, n_div_4) {
25801 for (uint32_t n = 8; n <= 12; n += 4) {
25802 for (size_t k = 1; k <= 40; k += 9) {
25803 GemmMicrokernelTester()
25804 .mr(4)
25805 .nr(4)
25806 .kr(8)
25807 .sr(1)
25808 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025809 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025810 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080025811 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025812 }
25813 }
25814 }
25815
25816 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
25817 for (uint32_t n = 8; n <= 12; n += 4) {
25818 for (size_t k = 1; k <= 40; k += 9) {
25819 GemmMicrokernelTester()
25820 .mr(4)
25821 .nr(4)
25822 .kr(8)
25823 .sr(1)
25824 .m(4)
25825 .n(n)
25826 .k(k)
25827 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080025828 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025829 }
25830 }
25831 }
25832
25833 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
25834 for (uint32_t n = 8; n <= 12; n += 4) {
25835 for (size_t k = 1; k <= 40; k += 9) {
25836 for (uint32_t m = 1; m <= 4; m++) {
25837 GemmMicrokernelTester()
25838 .mr(4)
25839 .nr(4)
25840 .kr(8)
25841 .sr(1)
25842 .m(m)
25843 .n(n)
25844 .k(k)
25845 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025846 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025847 }
25848 }
25849 }
25850 }
25851
25852 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, small_kernel) {
25853 for (size_t k = 1; k <= 40; k += 9) {
25854 GemmMicrokernelTester()
25855 .mr(4)
25856 .nr(4)
25857 .kr(8)
25858 .sr(1)
25859 .m(4)
25860 .n(4)
25861 .k(k)
25862 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080025863 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025864 }
25865 }
25866
25867 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, small_kernel_subtile) {
25868 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025869 for (uint32_t n = 1; n <= 4; n++) {
25870 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025871 GemmMicrokernelTester()
25872 .mr(4)
25873 .nr(4)
25874 .kr(8)
25875 .sr(1)
25876 .m(m)
25877 .n(n)
25878 .k(k)
25879 .ks(3)
25880 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025881 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025882 }
25883 }
25884 }
25885 }
25886
25887 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_small_kernel) {
25888 for (uint32_t n = 5; n < 8; n++) {
25889 for (size_t k = 1; k <= 40; k += 9) {
25890 GemmMicrokernelTester()
25891 .mr(4)
25892 .nr(4)
25893 .kr(8)
25894 .sr(1)
25895 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025896 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025897 .k(k)
25898 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080025899 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025900 }
25901 }
25902 }
25903
25904 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_small_kernel) {
25905 for (uint32_t n = 8; n <= 12; n += 4) {
25906 for (size_t k = 1; k <= 40; k += 9) {
25907 GemmMicrokernelTester()
25908 .mr(4)
25909 .nr(4)
25910 .kr(8)
25911 .sr(1)
25912 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025913 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025914 .k(k)
25915 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080025916 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025917 }
25918 }
25919 }
25920
25921 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
25922 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025923 for (uint32_t n = 1; n <= 4; n++) {
25924 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025925 GemmMicrokernelTester()
25926 .mr(4)
25927 .nr(4)
25928 .kr(8)
25929 .sr(1)
25930 .m(m)
25931 .n(n)
25932 .k(k)
25933 .cm_stride(7)
25934 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080025935 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025936 }
25937 }
25938 }
25939 }
25940
25941 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, a_offset) {
25942 for (size_t k = 1; k <= 40; k += 9) {
25943 GemmMicrokernelTester()
25944 .mr(4)
25945 .nr(4)
25946 .kr(8)
25947 .sr(1)
25948 .m(4)
25949 .n(4)
25950 .k(k)
25951 .ks(3)
25952 .a_offset(163)
Marat Dukhan50323b82022-01-11 00:12:01 -080025953 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025954 }
25955 }
25956
25957 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025958 for (size_t k = 1; k <= 40; k += 9) {
25959 for (uint32_t mz = 0; mz < 4; mz++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025960 GemmMicrokernelTester()
25961 .mr(4)
25962 .nr(4)
25963 .kr(8)
25964 .sr(1)
25965 .m(4)
25966 .n(4)
25967 .k(k)
25968 .ks(3)
25969 .a_offset(163)
25970 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080025971 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025972 }
25973 }
25974 }
25975
25976 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, qmin) {
25977 GemmMicrokernelTester()
25978 .mr(4)
25979 .nr(4)
25980 .kr(8)
25981 .sr(1)
25982 .m(4)
25983 .n(4)
25984 .k(8)
25985 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080025986 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070025987 }
25988
25989 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, qmax) {
25990 GemmMicrokernelTester()
25991 .mr(4)
25992 .nr(4)
25993 .kr(8)
25994 .sr(1)
25995 .m(4)
25996 .n(4)
25997 .k(8)
25998 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080025999 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070026000 }
26001
26002 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, strided_cm) {
26003 GemmMicrokernelTester()
26004 .mr(4)
26005 .nr(4)
26006 .kr(8)
26007 .sr(1)
26008 .m(4)
26009 .n(4)
26010 .k(8)
26011 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080026012 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070026013 }
26014
26015 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, no_a_zero_point) {
26016 for (size_t k = 1; k <= 40; k += 9) {
26017 GemmMicrokernelTester()
26018 .mr(4)
26019 .nr(4)
26020 .kr(8)
26021 .sr(1)
26022 .m(4)
26023 .n(4)
26024 .k(k)
26025 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080026026 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070026027 }
26028 }
26029
26030 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, no_b_zero_point) {
26031 for (size_t k = 1; k <= 40; k += 9) {
26032 GemmMicrokernelTester()
26033 .mr(4)
26034 .nr(4)
26035 .kr(8)
26036 .sr(1)
26037 .m(4)
26038 .n(4)
26039 .k(k)
26040 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080026041 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070026042 }
26043 }
26044
26045 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, no_zero_point) {
26046 for (size_t k = 1; k <= 40; k += 9) {
26047 GemmMicrokernelTester()
26048 .mr(4)
26049 .nr(4)
26050 .kr(8)
26051 .sr(1)
26052 .m(4)
26053 .n(4)
26054 .k(k)
26055 .a_zero_point(0)
26056 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080026057 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070026058 }
26059 }
Marat Dukhan4c617792021-12-21 15:47:58 -080026060#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan8dc106e2021-08-31 15:23:02 -070026061
26062
Marat Dukhan4c617792021-12-21 15:47:58 -080026063#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan8dc106e2021-08-31 15:23:02 -070026064 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8) {
26065 GemmMicrokernelTester()
26066 .mr(4)
26067 .nr(4)
26068 .kr(8)
26069 .sr(1)
26070 .m(4)
26071 .n(4)
26072 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080026073 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070026074 }
26075
26076 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, strided_cn) {
26077 GemmMicrokernelTester()
26078 .mr(4)
26079 .nr(4)
26080 .kr(8)
26081 .sr(1)
26082 .m(4)
26083 .n(4)
26084 .k(8)
26085 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080026086 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070026087 }
26088
26089 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026090 for (uint32_t n = 1; n <= 4; n++) {
26091 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070026092 GemmMicrokernelTester()
26093 .mr(4)
26094 .nr(4)
26095 .kr(8)
26096 .sr(1)
26097 .m(m)
26098 .n(n)
26099 .k(8)
26100 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026101 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070026102 }
26103 }
26104 }
26105
26106 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
26107 for (uint32_t m = 1; m <= 4; m++) {
26108 GemmMicrokernelTester()
26109 .mr(4)
26110 .nr(4)
26111 .kr(8)
26112 .sr(1)
26113 .m(m)
26114 .n(4)
26115 .k(8)
26116 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026117 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070026118 }
26119 }
26120
26121 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
26122 for (uint32_t n = 1; n <= 4; n++) {
26123 GemmMicrokernelTester()
26124 .mr(4)
26125 .nr(4)
26126 .kr(8)
26127 .sr(1)
26128 .m(4)
26129 .n(n)
26130 .k(8)
26131 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026132 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070026133 }
26134 }
26135
26136 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_lt_8) {
26137 for (size_t k = 1; k < 8; k++) {
26138 GemmMicrokernelTester()
26139 .mr(4)
26140 .nr(4)
26141 .kr(8)
26142 .sr(1)
26143 .m(4)
26144 .n(4)
26145 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080026146 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070026147 }
26148 }
26149
26150 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
26151 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026152 for (uint32_t n = 1; n <= 4; n++) {
26153 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070026154 GemmMicrokernelTester()
26155 .mr(4)
26156 .nr(4)
26157 .kr(8)
26158 .sr(1)
26159 .m(m)
26160 .n(n)
26161 .k(k)
26162 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026163 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070026164 }
26165 }
26166 }
26167 }
26168
26169 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_gt_8) {
26170 for (size_t k = 9; k < 16; k++) {
26171 GemmMicrokernelTester()
26172 .mr(4)
26173 .nr(4)
26174 .kr(8)
26175 .sr(1)
26176 .m(4)
26177 .n(4)
26178 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080026179 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070026180 }
26181 }
26182
26183 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
26184 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026185 for (uint32_t n = 1; n <= 4; n++) {
26186 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070026187 GemmMicrokernelTester()
26188 .mr(4)
26189 .nr(4)
26190 .kr(8)
26191 .sr(1)
26192 .m(m)
26193 .n(n)
26194 .k(k)
26195 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026196 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070026197 }
26198 }
26199 }
26200 }
26201
26202 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_div_8) {
26203 for (size_t k = 16; k <= 80; k += 8) {
26204 GemmMicrokernelTester()
26205 .mr(4)
26206 .nr(4)
26207 .kr(8)
26208 .sr(1)
26209 .m(4)
26210 .n(4)
26211 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080026212 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070026213 }
26214 }
26215
26216 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
26217 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026218 for (uint32_t n = 1; n <= 4; n++) {
26219 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070026220 GemmMicrokernelTester()
26221 .mr(4)
26222 .nr(4)
26223 .kr(8)
26224 .sr(1)
26225 .m(m)
26226 .n(n)
26227 .k(k)
26228 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026229 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070026230 }
26231 }
26232 }
26233 }
26234
26235 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4) {
26236 for (uint32_t n = 5; n < 8; n++) {
26237 for (size_t k = 1; k <= 40; k += 9) {
26238 GemmMicrokernelTester()
26239 .mr(4)
26240 .nr(4)
26241 .kr(8)
26242 .sr(1)
26243 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026244 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070026245 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080026246 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070026247 }
26248 }
26249 }
26250
26251 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
26252 for (uint32_t n = 5; n < 8; n++) {
26253 for (size_t k = 1; k <= 40; k += 9) {
26254 GemmMicrokernelTester()
26255 .mr(4)
26256 .nr(4)
26257 .kr(8)
26258 .sr(1)
26259 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026260 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070026261 .k(k)
26262 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080026263 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070026264 }
26265 }
26266 }
26267
26268 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
26269 for (uint32_t n = 5; n < 8; n++) {
26270 for (size_t k = 1; k <= 40; k += 9) {
26271 for (uint32_t m = 1; m <= 4; m++) {
26272 GemmMicrokernelTester()
26273 .mr(4)
26274 .nr(4)
26275 .kr(8)
26276 .sr(1)
26277 .m(m)
26278 .n(n)
26279 .k(k)
26280 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026281 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070026282 }
26283 }
26284 }
26285 }
26286
26287 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, n_div_4) {
26288 for (uint32_t n = 8; n <= 12; n += 4) {
26289 for (size_t k = 1; k <= 40; k += 9) {
26290 GemmMicrokernelTester()
26291 .mr(4)
26292 .nr(4)
26293 .kr(8)
26294 .sr(1)
26295 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026296 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070026297 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080026298 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070026299 }
26300 }
26301 }
26302
26303 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
26304 for (uint32_t n = 8; n <= 12; n += 4) {
26305 for (size_t k = 1; k <= 40; k += 9) {
26306 GemmMicrokernelTester()
26307 .mr(4)
26308 .nr(4)
26309 .kr(8)
26310 .sr(1)
26311 .m(4)
26312 .n(n)
26313 .k(k)
26314 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080026315 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070026316 }
26317 }
26318 }
26319
26320 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
26321 for (uint32_t n = 8; n <= 12; n += 4) {
26322 for (size_t k = 1; k <= 40; k += 9) {
26323 for (uint32_t m = 1; m <= 4; m++) {
26324 GemmMicrokernelTester()
26325 .mr(4)
26326 .nr(4)
26327 .kr(8)
26328 .sr(1)
26329 .m(m)
26330 .n(n)
26331 .k(k)
26332 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026333 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070026334 }
26335 }
26336 }
26337 }
26338
26339 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, small_kernel) {
26340 for (size_t k = 1; k <= 40; k += 9) {
26341 GemmMicrokernelTester()
26342 .mr(4)
26343 .nr(4)
26344 .kr(8)
26345 .sr(1)
26346 .m(4)
26347 .n(4)
26348 .k(k)
26349 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080026350 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070026351 }
26352 }
26353
26354 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, small_kernel_subtile) {
26355 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026356 for (uint32_t n = 1; n <= 4; n++) {
26357 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070026358 GemmMicrokernelTester()
26359 .mr(4)
26360 .nr(4)
26361 .kr(8)
26362 .sr(1)
26363 .m(m)
26364 .n(n)
26365 .k(k)
26366 .ks(3)
26367 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026368 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070026369 }
26370 }
26371 }
26372 }
26373
26374 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_small_kernel) {
26375 for (uint32_t n = 5; n < 8; n++) {
26376 for (size_t k = 1; k <= 40; k += 9) {
26377 GemmMicrokernelTester()
26378 .mr(4)
26379 .nr(4)
26380 .kr(8)
26381 .sr(1)
26382 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026383 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070026384 .k(k)
26385 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080026386 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070026387 }
26388 }
26389 }
26390
26391 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_small_kernel) {
26392 for (uint32_t n = 8; n <= 12; n += 4) {
26393 for (size_t k = 1; k <= 40; k += 9) {
26394 GemmMicrokernelTester()
26395 .mr(4)
26396 .nr(4)
26397 .kr(8)
26398 .sr(1)
26399 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026400 .n(n)
Marat Dukhan8dc106e2021-08-31 15:23:02 -070026401 .k(k)
26402 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080026403 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070026404 }
26405 }
26406 }
26407
26408 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
26409 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026410 for (uint32_t n = 1; n <= 4; n++) {
26411 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070026412 GemmMicrokernelTester()
26413 .mr(4)
26414 .nr(4)
26415 .kr(8)
26416 .sr(1)
26417 .m(m)
26418 .n(n)
26419 .k(k)
26420 .cm_stride(7)
26421 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026422 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070026423 }
26424 }
26425 }
26426 }
26427
26428 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, a_offset) {
26429 for (size_t k = 1; k <= 40; k += 9) {
26430 GemmMicrokernelTester()
26431 .mr(4)
26432 .nr(4)
26433 .kr(8)
26434 .sr(1)
26435 .m(4)
26436 .n(4)
26437 .k(k)
26438 .ks(3)
26439 .a_offset(163)
Marat Dukhan50323b82022-01-11 00:12:01 -080026440 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070026441 }
26442 }
26443
26444 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026445 for (size_t k = 1; k <= 40; k += 9) {
26446 for (uint32_t mz = 0; mz < 4; mz++) {
Marat Dukhan8dc106e2021-08-31 15:23:02 -070026447 GemmMicrokernelTester()
26448 .mr(4)
26449 .nr(4)
26450 .kr(8)
26451 .sr(1)
26452 .m(4)
26453 .n(4)
26454 .k(k)
26455 .ks(3)
26456 .a_offset(163)
26457 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080026458 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070026459 }
26460 }
26461 }
26462
26463 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, qmin) {
26464 GemmMicrokernelTester()
26465 .mr(4)
26466 .nr(4)
26467 .kr(8)
26468 .sr(1)
26469 .m(4)
26470 .n(4)
26471 .k(8)
26472 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080026473 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070026474 }
26475
26476 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, qmax) {
26477 GemmMicrokernelTester()
26478 .mr(4)
26479 .nr(4)
26480 .kr(8)
26481 .sr(1)
26482 .m(4)
26483 .n(4)
26484 .k(8)
26485 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080026486 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070026487 }
26488
26489 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, strided_cm) {
26490 GemmMicrokernelTester()
26491 .mr(4)
26492 .nr(4)
26493 .kr(8)
26494 .sr(1)
26495 .m(4)
26496 .n(4)
26497 .k(8)
26498 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080026499 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070026500 }
26501
26502 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, no_a_zero_point) {
26503 for (size_t k = 1; k <= 40; k += 9) {
26504 GemmMicrokernelTester()
26505 .mr(4)
26506 .nr(4)
26507 .kr(8)
26508 .sr(1)
26509 .m(4)
26510 .n(4)
26511 .k(k)
26512 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080026513 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070026514 }
26515 }
26516
26517 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, no_b_zero_point) {
26518 for (size_t k = 1; k <= 40; k += 9) {
26519 GemmMicrokernelTester()
26520 .mr(4)
26521 .nr(4)
26522 .kr(8)
26523 .sr(1)
26524 .m(4)
26525 .n(4)
26526 .k(k)
26527 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080026528 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070026529 }
26530 }
26531
26532 TEST(QU8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, no_zero_point) {
26533 for (size_t k = 1; k <= 40; k += 9) {
26534 GemmMicrokernelTester()
26535 .mr(4)
26536 .nr(4)
26537 .kr(8)
26538 .sr(1)
26539 .m(4)
26540 .n(4)
26541 .k(k)
26542 .a_zero_point(0)
26543 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080026544 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan8dc106e2021-08-31 15:23:02 -070026545 }
26546 }
Marat Dukhan4c617792021-12-21 15:47:58 -080026547#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan8dc106e2021-08-31 15:23:02 -070026548
26549
Marat Dukhan4c617792021-12-21 15:47:58 -080026550#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhandfc2db02021-08-08 21:19:07 -070026551 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD64, k_eq_8) {
Marat Dukhan43bee052021-07-14 20:57:18 -070026552 GemmMicrokernelTester()
26553 .mr(3)
26554 .nr(4)
26555 .kr(8)
26556 .sr(1)
26557 .m(3)
26558 .n(4)
26559 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080026560 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070026561 }
26562
Marat Dukhandfc2db02021-08-08 21:19:07 -070026563 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD64, strided_cn) {
Marat Dukhan43bee052021-07-14 20:57:18 -070026564 GemmMicrokernelTester()
26565 .mr(3)
26566 .nr(4)
26567 .kr(8)
26568 .sr(1)
26569 .m(3)
26570 .n(4)
26571 .k(8)
26572 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080026573 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070026574 }
26575
Marat Dukhandfc2db02021-08-08 21:19:07 -070026576 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD64, k_eq_8_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026577 for (uint32_t n = 1; n <= 4; n++) {
26578 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan43bee052021-07-14 20:57:18 -070026579 GemmMicrokernelTester()
26580 .mr(3)
26581 .nr(4)
26582 .kr(8)
26583 .sr(1)
26584 .m(m)
26585 .n(n)
26586 .k(8)
26587 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026588 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070026589 }
26590 }
26591 }
26592
Marat Dukhandfc2db02021-08-08 21:19:07 -070026593 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD64, k_eq_8_subtile_m) {
Marat Dukhan43bee052021-07-14 20:57:18 -070026594 for (uint32_t m = 1; m <= 3; m++) {
26595 GemmMicrokernelTester()
26596 .mr(3)
26597 .nr(4)
26598 .kr(8)
26599 .sr(1)
26600 .m(m)
26601 .n(4)
26602 .k(8)
26603 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026604 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070026605 }
26606 }
26607
Marat Dukhandfc2db02021-08-08 21:19:07 -070026608 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD64, k_eq_8_subtile_n) {
Marat Dukhan43bee052021-07-14 20:57:18 -070026609 for (uint32_t n = 1; n <= 4; n++) {
26610 GemmMicrokernelTester()
26611 .mr(3)
26612 .nr(4)
26613 .kr(8)
26614 .sr(1)
26615 .m(3)
26616 .n(n)
26617 .k(8)
26618 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026619 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070026620 }
26621 }
26622
Marat Dukhandfc2db02021-08-08 21:19:07 -070026623 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD64, k_lt_8) {
Marat Dukhan43bee052021-07-14 20:57:18 -070026624 for (size_t k = 1; k < 8; k++) {
26625 GemmMicrokernelTester()
26626 .mr(3)
26627 .nr(4)
26628 .kr(8)
26629 .sr(1)
26630 .m(3)
26631 .n(4)
26632 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080026633 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070026634 }
26635 }
26636
Marat Dukhandfc2db02021-08-08 21:19:07 -070026637 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD64, k_lt_8_subtile) {
Marat Dukhan43bee052021-07-14 20:57:18 -070026638 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026639 for (uint32_t n = 1; n <= 4; n++) {
26640 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan43bee052021-07-14 20:57:18 -070026641 GemmMicrokernelTester()
26642 .mr(3)
26643 .nr(4)
26644 .kr(8)
26645 .sr(1)
26646 .m(m)
26647 .n(n)
26648 .k(k)
26649 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026650 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070026651 }
26652 }
26653 }
26654 }
26655
Marat Dukhandfc2db02021-08-08 21:19:07 -070026656 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD64, k_gt_8) {
Marat Dukhan43bee052021-07-14 20:57:18 -070026657 for (size_t k = 9; k < 16; k++) {
26658 GemmMicrokernelTester()
26659 .mr(3)
26660 .nr(4)
26661 .kr(8)
26662 .sr(1)
26663 .m(3)
26664 .n(4)
26665 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080026666 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070026667 }
26668 }
26669
Marat Dukhandfc2db02021-08-08 21:19:07 -070026670 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD64, k_gt_8_subtile) {
Marat Dukhan43bee052021-07-14 20:57:18 -070026671 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026672 for (uint32_t n = 1; n <= 4; n++) {
26673 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan43bee052021-07-14 20:57:18 -070026674 GemmMicrokernelTester()
26675 .mr(3)
26676 .nr(4)
26677 .kr(8)
26678 .sr(1)
26679 .m(m)
26680 .n(n)
26681 .k(k)
26682 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026683 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070026684 }
26685 }
26686 }
26687 }
26688
Marat Dukhandfc2db02021-08-08 21:19:07 -070026689 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD64, k_div_8) {
Marat Dukhan43bee052021-07-14 20:57:18 -070026690 for (size_t k = 16; k <= 80; k += 8) {
26691 GemmMicrokernelTester()
26692 .mr(3)
26693 .nr(4)
26694 .kr(8)
26695 .sr(1)
26696 .m(3)
26697 .n(4)
26698 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080026699 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070026700 }
26701 }
26702
Marat Dukhandfc2db02021-08-08 21:19:07 -070026703 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD64, k_div_8_subtile) {
Marat Dukhan43bee052021-07-14 20:57:18 -070026704 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026705 for (uint32_t n = 1; n <= 4; n++) {
26706 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan43bee052021-07-14 20:57:18 -070026707 GemmMicrokernelTester()
26708 .mr(3)
26709 .nr(4)
26710 .kr(8)
26711 .sr(1)
26712 .m(m)
26713 .n(n)
26714 .k(k)
26715 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026716 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070026717 }
26718 }
26719 }
26720 }
26721
Marat Dukhandfc2db02021-08-08 21:19:07 -070026722 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD64, n_gt_4) {
Marat Dukhan43bee052021-07-14 20:57:18 -070026723 for (uint32_t n = 5; n < 8; n++) {
26724 for (size_t k = 1; k <= 40; k += 9) {
26725 GemmMicrokernelTester()
26726 .mr(3)
26727 .nr(4)
26728 .kr(8)
26729 .sr(1)
26730 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026731 .n(n)
Marat Dukhan43bee052021-07-14 20:57:18 -070026732 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080026733 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070026734 }
26735 }
26736 }
26737
Marat Dukhandfc2db02021-08-08 21:19:07 -070026738 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD64, n_gt_4_strided_cn) {
Marat Dukhan43bee052021-07-14 20:57:18 -070026739 for (uint32_t n = 5; n < 8; n++) {
26740 for (size_t k = 1; k <= 40; k += 9) {
26741 GemmMicrokernelTester()
26742 .mr(3)
26743 .nr(4)
26744 .kr(8)
26745 .sr(1)
26746 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026747 .n(n)
Marat Dukhan43bee052021-07-14 20:57:18 -070026748 .k(k)
26749 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080026750 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070026751 }
26752 }
26753 }
26754
Marat Dukhandfc2db02021-08-08 21:19:07 -070026755 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD64, n_gt_4_subtile) {
Marat Dukhan43bee052021-07-14 20:57:18 -070026756 for (uint32_t n = 5; n < 8; n++) {
26757 for (size_t k = 1; k <= 40; k += 9) {
26758 for (uint32_t m = 1; m <= 3; m++) {
26759 GemmMicrokernelTester()
26760 .mr(3)
26761 .nr(4)
26762 .kr(8)
26763 .sr(1)
26764 .m(m)
26765 .n(n)
26766 .k(k)
26767 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026768 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070026769 }
26770 }
26771 }
26772 }
26773
Marat Dukhandfc2db02021-08-08 21:19:07 -070026774 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD64, n_div_4) {
Marat Dukhan43bee052021-07-14 20:57:18 -070026775 for (uint32_t n = 8; n <= 12; n += 4) {
26776 for (size_t k = 1; k <= 40; k += 9) {
26777 GemmMicrokernelTester()
26778 .mr(3)
26779 .nr(4)
26780 .kr(8)
26781 .sr(1)
26782 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026783 .n(n)
Marat Dukhan43bee052021-07-14 20:57:18 -070026784 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080026785 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070026786 }
26787 }
26788 }
26789
Marat Dukhandfc2db02021-08-08 21:19:07 -070026790 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD64, n_div_4_strided_cn) {
Marat Dukhan43bee052021-07-14 20:57:18 -070026791 for (uint32_t n = 8; n <= 12; n += 4) {
26792 for (size_t k = 1; k <= 40; k += 9) {
26793 GemmMicrokernelTester()
26794 .mr(3)
26795 .nr(4)
26796 .kr(8)
26797 .sr(1)
26798 .m(3)
26799 .n(n)
26800 .k(k)
26801 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080026802 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070026803 }
26804 }
26805 }
26806
Marat Dukhandfc2db02021-08-08 21:19:07 -070026807 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD64, n_div_4_subtile) {
Marat Dukhan43bee052021-07-14 20:57:18 -070026808 for (uint32_t n = 8; n <= 12; n += 4) {
26809 for (size_t k = 1; k <= 40; k += 9) {
26810 for (uint32_t m = 1; m <= 3; m++) {
26811 GemmMicrokernelTester()
26812 .mr(3)
26813 .nr(4)
26814 .kr(8)
26815 .sr(1)
26816 .m(m)
26817 .n(n)
26818 .k(k)
26819 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026820 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070026821 }
26822 }
26823 }
26824 }
26825
Marat Dukhandfc2db02021-08-08 21:19:07 -070026826 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD64, small_kernel) {
Marat Dukhan43bee052021-07-14 20:57:18 -070026827 for (size_t k = 1; k <= 40; k += 9) {
26828 GemmMicrokernelTester()
26829 .mr(3)
26830 .nr(4)
26831 .kr(8)
26832 .sr(1)
26833 .m(3)
26834 .n(4)
26835 .k(k)
26836 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080026837 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070026838 }
26839 }
26840
Marat Dukhandfc2db02021-08-08 21:19:07 -070026841 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD64, small_kernel_subtile) {
Marat Dukhan43bee052021-07-14 20:57:18 -070026842 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026843 for (uint32_t n = 1; n <= 4; n++) {
26844 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan43bee052021-07-14 20:57:18 -070026845 GemmMicrokernelTester()
26846 .mr(3)
26847 .nr(4)
26848 .kr(8)
26849 .sr(1)
26850 .m(m)
26851 .n(n)
26852 .k(k)
26853 .ks(3)
26854 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026855 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070026856 }
26857 }
26858 }
26859 }
26860
Marat Dukhandfc2db02021-08-08 21:19:07 -070026861 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD64, n_gt_4_small_kernel) {
Marat Dukhan43bee052021-07-14 20:57:18 -070026862 for (uint32_t n = 5; n < 8; n++) {
26863 for (size_t k = 1; k <= 40; k += 9) {
26864 GemmMicrokernelTester()
26865 .mr(3)
26866 .nr(4)
26867 .kr(8)
26868 .sr(1)
26869 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026870 .n(n)
Marat Dukhan43bee052021-07-14 20:57:18 -070026871 .k(k)
26872 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080026873 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070026874 }
26875 }
26876 }
26877
Marat Dukhandfc2db02021-08-08 21:19:07 -070026878 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD64, n_div_4_small_kernel) {
Marat Dukhan43bee052021-07-14 20:57:18 -070026879 for (uint32_t n = 8; n <= 12; n += 4) {
26880 for (size_t k = 1; k <= 40; k += 9) {
26881 GemmMicrokernelTester()
26882 .mr(3)
26883 .nr(4)
26884 .kr(8)
26885 .sr(1)
26886 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080026887 .n(n)
Marat Dukhan43bee052021-07-14 20:57:18 -070026888 .k(k)
26889 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080026890 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070026891 }
26892 }
26893 }
26894
Marat Dukhandfc2db02021-08-08 21:19:07 -070026895 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD64, strided_cm_subtile) {
Marat Dukhan43bee052021-07-14 20:57:18 -070026896 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026897 for (uint32_t n = 1; n <= 4; n++) {
26898 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan43bee052021-07-14 20:57:18 -070026899 GemmMicrokernelTester()
26900 .mr(3)
26901 .nr(4)
26902 .kr(8)
26903 .sr(1)
26904 .m(m)
26905 .n(n)
26906 .k(k)
26907 .cm_stride(7)
26908 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080026909 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070026910 }
26911 }
26912 }
26913 }
26914
Marat Dukhandfc2db02021-08-08 21:19:07 -070026915 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD64, a_offset) {
Marat Dukhan43bee052021-07-14 20:57:18 -070026916 for (size_t k = 1; k <= 40; k += 9) {
26917 GemmMicrokernelTester()
26918 .mr(3)
26919 .nr(4)
26920 .kr(8)
26921 .sr(1)
26922 .m(3)
26923 .n(4)
26924 .k(k)
26925 .ks(3)
26926 .a_offset(127)
Marat Dukhan50323b82022-01-11 00:12:01 -080026927 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070026928 }
26929 }
26930
Marat Dukhandfc2db02021-08-08 21:19:07 -070026931 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD64, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080026932 for (size_t k = 1; k <= 40; k += 9) {
26933 for (uint32_t mz = 0; mz < 3; mz++) {
Marat Dukhan43bee052021-07-14 20:57:18 -070026934 GemmMicrokernelTester()
26935 .mr(3)
26936 .nr(4)
26937 .kr(8)
26938 .sr(1)
26939 .m(3)
26940 .n(4)
26941 .k(k)
26942 .ks(3)
26943 .a_offset(127)
26944 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080026945 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070026946 }
26947 }
26948 }
26949
Marat Dukhandfc2db02021-08-08 21:19:07 -070026950 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD64, qmin) {
Marat Dukhan43bee052021-07-14 20:57:18 -070026951 GemmMicrokernelTester()
26952 .mr(3)
26953 .nr(4)
26954 .kr(8)
26955 .sr(1)
26956 .m(3)
26957 .n(4)
26958 .k(8)
26959 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080026960 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070026961 }
26962
Marat Dukhandfc2db02021-08-08 21:19:07 -070026963 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD64, qmax) {
Marat Dukhan43bee052021-07-14 20:57:18 -070026964 GemmMicrokernelTester()
26965 .mr(3)
26966 .nr(4)
26967 .kr(8)
26968 .sr(1)
26969 .m(3)
26970 .n(4)
26971 .k(8)
26972 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080026973 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070026974 }
26975
Marat Dukhandfc2db02021-08-08 21:19:07 -070026976 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD64, strided_cm) {
Marat Dukhan43bee052021-07-14 20:57:18 -070026977 GemmMicrokernelTester()
26978 .mr(3)
26979 .nr(4)
26980 .kr(8)
26981 .sr(1)
26982 .m(3)
26983 .n(4)
26984 .k(8)
26985 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080026986 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070026987 }
26988
Marat Dukhandfc2db02021-08-08 21:19:07 -070026989 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD64, no_a_zero_point) {
Marat Dukhan43bee052021-07-14 20:57:18 -070026990 for (size_t k = 1; k <= 40; k += 9) {
26991 GemmMicrokernelTester()
26992 .mr(3)
26993 .nr(4)
26994 .kr(8)
26995 .sr(1)
26996 .m(3)
26997 .n(4)
26998 .k(k)
26999 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080027000 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070027001 }
27002 }
27003
Marat Dukhandfc2db02021-08-08 21:19:07 -070027004 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD64, no_b_zero_point) {
Marat Dukhan43bee052021-07-14 20:57:18 -070027005 for (size_t k = 1; k <= 40; k += 9) {
27006 GemmMicrokernelTester()
27007 .mr(3)
27008 .nr(4)
27009 .kr(8)
27010 .sr(1)
27011 .m(3)
27012 .n(4)
27013 .k(k)
27014 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080027015 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070027016 }
27017 }
27018
Marat Dukhandfc2db02021-08-08 21:19:07 -070027019 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD64, no_zero_point) {
Marat Dukhan43bee052021-07-14 20:57:18 -070027020 for (size_t k = 1; k <= 40; k += 9) {
27021 GemmMicrokernelTester()
27022 .mr(3)
27023 .nr(4)
27024 .kr(8)
27025 .sr(1)
27026 .m(3)
27027 .n(4)
27028 .k(k)
27029 .a_zero_point(0)
27030 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080027031 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld64, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070027032 }
27033 }
Marat Dukhan4c617792021-12-21 15:47:58 -080027034#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan43bee052021-07-14 20:57:18 -070027035
27036
Marat Dukhan4c617792021-12-21 15:47:58 -080027037#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhandfc2db02021-08-08 21:19:07 -070027038 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL32_LD128, k_eq_8) {
Marat Dukhan43bee052021-07-14 20:57:18 -070027039 GemmMicrokernelTester()
27040 .mr(1)
27041 .nr(4)
27042 .kr(8)
27043 .sr(1)
27044 .m(1)
27045 .n(4)
27046 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080027047 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070027048 }
27049
Marat Dukhandfc2db02021-08-08 21:19:07 -070027050 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL32_LD128, strided_cn) {
Marat Dukhan43bee052021-07-14 20:57:18 -070027051 GemmMicrokernelTester()
27052 .mr(1)
27053 .nr(4)
27054 .kr(8)
27055 .sr(1)
27056 .m(1)
27057 .n(4)
27058 .k(8)
27059 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080027060 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070027061 }
27062
Marat Dukhandfc2db02021-08-08 21:19:07 -070027063 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL32_LD128, k_eq_8_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027064 for (uint32_t n = 1; n <= 4; n++) {
27065 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan43bee052021-07-14 20:57:18 -070027066 GemmMicrokernelTester()
27067 .mr(1)
27068 .nr(4)
27069 .kr(8)
27070 .sr(1)
27071 .m(m)
27072 .n(n)
27073 .k(8)
27074 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027075 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070027076 }
27077 }
27078 }
27079
Marat Dukhandfc2db02021-08-08 21:19:07 -070027080 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL32_LD128, k_eq_8_subtile_m) {
Marat Dukhan43bee052021-07-14 20:57:18 -070027081 for (uint32_t m = 1; m <= 1; m++) {
27082 GemmMicrokernelTester()
27083 .mr(1)
27084 .nr(4)
27085 .kr(8)
27086 .sr(1)
27087 .m(m)
27088 .n(4)
27089 .k(8)
27090 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027091 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070027092 }
27093 }
27094
Marat Dukhandfc2db02021-08-08 21:19:07 -070027095 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL32_LD128, k_eq_8_subtile_n) {
Marat Dukhan43bee052021-07-14 20:57:18 -070027096 for (uint32_t n = 1; n <= 4; n++) {
27097 GemmMicrokernelTester()
27098 .mr(1)
27099 .nr(4)
27100 .kr(8)
27101 .sr(1)
27102 .m(1)
27103 .n(n)
27104 .k(8)
27105 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027106 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070027107 }
27108 }
27109
Marat Dukhandfc2db02021-08-08 21:19:07 -070027110 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL32_LD128, k_lt_8) {
Marat Dukhan43bee052021-07-14 20:57:18 -070027111 for (size_t k = 1; k < 8; k++) {
27112 GemmMicrokernelTester()
27113 .mr(1)
27114 .nr(4)
27115 .kr(8)
27116 .sr(1)
27117 .m(1)
27118 .n(4)
27119 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080027120 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070027121 }
27122 }
27123
Marat Dukhandfc2db02021-08-08 21:19:07 -070027124 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL32_LD128, k_lt_8_subtile) {
Marat Dukhan43bee052021-07-14 20:57:18 -070027125 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027126 for (uint32_t n = 1; n <= 4; n++) {
27127 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan43bee052021-07-14 20:57:18 -070027128 GemmMicrokernelTester()
27129 .mr(1)
27130 .nr(4)
27131 .kr(8)
27132 .sr(1)
27133 .m(m)
27134 .n(n)
27135 .k(k)
27136 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027137 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070027138 }
27139 }
27140 }
27141 }
27142
Marat Dukhandfc2db02021-08-08 21:19:07 -070027143 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL32_LD128, k_gt_8) {
Marat Dukhan43bee052021-07-14 20:57:18 -070027144 for (size_t k = 9; k < 16; k++) {
27145 GemmMicrokernelTester()
27146 .mr(1)
27147 .nr(4)
27148 .kr(8)
27149 .sr(1)
27150 .m(1)
27151 .n(4)
27152 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080027153 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070027154 }
27155 }
27156
Marat Dukhandfc2db02021-08-08 21:19:07 -070027157 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL32_LD128, k_gt_8_subtile) {
Marat Dukhan43bee052021-07-14 20:57:18 -070027158 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027159 for (uint32_t n = 1; n <= 4; n++) {
27160 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan43bee052021-07-14 20:57:18 -070027161 GemmMicrokernelTester()
27162 .mr(1)
27163 .nr(4)
27164 .kr(8)
27165 .sr(1)
27166 .m(m)
27167 .n(n)
27168 .k(k)
27169 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027170 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070027171 }
27172 }
27173 }
27174 }
27175
Marat Dukhandfc2db02021-08-08 21:19:07 -070027176 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL32_LD128, k_div_8) {
Marat Dukhan43bee052021-07-14 20:57:18 -070027177 for (size_t k = 16; k <= 80; k += 8) {
27178 GemmMicrokernelTester()
27179 .mr(1)
27180 .nr(4)
27181 .kr(8)
27182 .sr(1)
27183 .m(1)
27184 .n(4)
27185 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080027186 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070027187 }
27188 }
27189
Marat Dukhandfc2db02021-08-08 21:19:07 -070027190 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL32_LD128, k_div_8_subtile) {
Marat Dukhan43bee052021-07-14 20:57:18 -070027191 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027192 for (uint32_t n = 1; n <= 4; n++) {
27193 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan43bee052021-07-14 20:57:18 -070027194 GemmMicrokernelTester()
27195 .mr(1)
27196 .nr(4)
27197 .kr(8)
27198 .sr(1)
27199 .m(m)
27200 .n(n)
27201 .k(k)
27202 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027203 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070027204 }
27205 }
27206 }
27207 }
27208
Marat Dukhandfc2db02021-08-08 21:19:07 -070027209 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL32_LD128, n_gt_4) {
Marat Dukhan43bee052021-07-14 20:57:18 -070027210 for (uint32_t n = 5; n < 8; n++) {
27211 for (size_t k = 1; k <= 40; k += 9) {
27212 GemmMicrokernelTester()
27213 .mr(1)
27214 .nr(4)
27215 .kr(8)
27216 .sr(1)
27217 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080027218 .n(n)
Marat Dukhan43bee052021-07-14 20:57:18 -070027219 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080027220 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070027221 }
27222 }
27223 }
27224
Marat Dukhandfc2db02021-08-08 21:19:07 -070027225 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL32_LD128, n_gt_4_strided_cn) {
Marat Dukhan43bee052021-07-14 20:57:18 -070027226 for (uint32_t n = 5; n < 8; n++) {
27227 for (size_t k = 1; k <= 40; k += 9) {
27228 GemmMicrokernelTester()
27229 .mr(1)
27230 .nr(4)
27231 .kr(8)
27232 .sr(1)
27233 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080027234 .n(n)
Marat Dukhan43bee052021-07-14 20:57:18 -070027235 .k(k)
27236 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080027237 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070027238 }
27239 }
27240 }
27241
Marat Dukhandfc2db02021-08-08 21:19:07 -070027242 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL32_LD128, n_gt_4_subtile) {
Marat Dukhan43bee052021-07-14 20:57:18 -070027243 for (uint32_t n = 5; n < 8; n++) {
27244 for (size_t k = 1; k <= 40; k += 9) {
27245 for (uint32_t m = 1; m <= 1; m++) {
27246 GemmMicrokernelTester()
27247 .mr(1)
27248 .nr(4)
27249 .kr(8)
27250 .sr(1)
27251 .m(m)
27252 .n(n)
27253 .k(k)
27254 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027255 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070027256 }
27257 }
27258 }
27259 }
27260
Marat Dukhandfc2db02021-08-08 21:19:07 -070027261 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL32_LD128, n_div_4) {
Marat Dukhan43bee052021-07-14 20:57:18 -070027262 for (uint32_t n = 8; n <= 12; n += 4) {
27263 for (size_t k = 1; k <= 40; k += 9) {
27264 GemmMicrokernelTester()
27265 .mr(1)
27266 .nr(4)
27267 .kr(8)
27268 .sr(1)
27269 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080027270 .n(n)
Marat Dukhan43bee052021-07-14 20:57:18 -070027271 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080027272 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070027273 }
27274 }
27275 }
27276
Marat Dukhandfc2db02021-08-08 21:19:07 -070027277 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL32_LD128, n_div_4_strided_cn) {
Marat Dukhan43bee052021-07-14 20:57:18 -070027278 for (uint32_t n = 8; n <= 12; n += 4) {
27279 for (size_t k = 1; k <= 40; k += 9) {
27280 GemmMicrokernelTester()
27281 .mr(1)
27282 .nr(4)
27283 .kr(8)
27284 .sr(1)
27285 .m(1)
27286 .n(n)
27287 .k(k)
27288 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080027289 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070027290 }
27291 }
27292 }
27293
Marat Dukhandfc2db02021-08-08 21:19:07 -070027294 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL32_LD128, n_div_4_subtile) {
Marat Dukhan43bee052021-07-14 20:57:18 -070027295 for (uint32_t n = 8; n <= 12; n += 4) {
27296 for (size_t k = 1; k <= 40; k += 9) {
27297 for (uint32_t m = 1; m <= 1; m++) {
27298 GemmMicrokernelTester()
27299 .mr(1)
27300 .nr(4)
27301 .kr(8)
27302 .sr(1)
27303 .m(m)
27304 .n(n)
27305 .k(k)
27306 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027307 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070027308 }
27309 }
27310 }
27311 }
27312
Marat Dukhandfc2db02021-08-08 21:19:07 -070027313 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL32_LD128, small_kernel) {
Marat Dukhan43bee052021-07-14 20:57:18 -070027314 for (size_t k = 1; k <= 40; k += 9) {
27315 GemmMicrokernelTester()
27316 .mr(1)
27317 .nr(4)
27318 .kr(8)
27319 .sr(1)
27320 .m(1)
27321 .n(4)
27322 .k(k)
27323 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080027324 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070027325 }
27326 }
27327
Marat Dukhandfc2db02021-08-08 21:19:07 -070027328 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL32_LD128, small_kernel_subtile) {
Marat Dukhan43bee052021-07-14 20:57:18 -070027329 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027330 for (uint32_t n = 1; n <= 4; n++) {
27331 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan43bee052021-07-14 20:57:18 -070027332 GemmMicrokernelTester()
27333 .mr(1)
27334 .nr(4)
27335 .kr(8)
27336 .sr(1)
27337 .m(m)
27338 .n(n)
27339 .k(k)
27340 .ks(3)
27341 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027342 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070027343 }
27344 }
27345 }
27346 }
27347
Marat Dukhandfc2db02021-08-08 21:19:07 -070027348 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL32_LD128, n_gt_4_small_kernel) {
Marat Dukhan43bee052021-07-14 20:57:18 -070027349 for (uint32_t n = 5; n < 8; n++) {
27350 for (size_t k = 1; k <= 40; k += 9) {
27351 GemmMicrokernelTester()
27352 .mr(1)
27353 .nr(4)
27354 .kr(8)
27355 .sr(1)
27356 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080027357 .n(n)
Marat Dukhan43bee052021-07-14 20:57:18 -070027358 .k(k)
27359 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080027360 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070027361 }
27362 }
27363 }
27364
Marat Dukhandfc2db02021-08-08 21:19:07 -070027365 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL32_LD128, n_div_4_small_kernel) {
Marat Dukhan43bee052021-07-14 20:57:18 -070027366 for (uint32_t n = 8; n <= 12; n += 4) {
27367 for (size_t k = 1; k <= 40; k += 9) {
27368 GemmMicrokernelTester()
27369 .mr(1)
27370 .nr(4)
27371 .kr(8)
27372 .sr(1)
27373 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080027374 .n(n)
Marat Dukhan43bee052021-07-14 20:57:18 -070027375 .k(k)
27376 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080027377 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070027378 }
27379 }
27380 }
27381
Marat Dukhandfc2db02021-08-08 21:19:07 -070027382 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL32_LD128, strided_cm_subtile) {
Marat Dukhan43bee052021-07-14 20:57:18 -070027383 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027384 for (uint32_t n = 1; n <= 4; n++) {
27385 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan43bee052021-07-14 20:57:18 -070027386 GemmMicrokernelTester()
27387 .mr(1)
27388 .nr(4)
27389 .kr(8)
27390 .sr(1)
27391 .m(m)
27392 .n(n)
27393 .k(k)
27394 .cm_stride(7)
27395 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027396 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070027397 }
27398 }
27399 }
27400 }
27401
Marat Dukhandfc2db02021-08-08 21:19:07 -070027402 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL32_LD128, a_offset) {
Marat Dukhan43bee052021-07-14 20:57:18 -070027403 for (size_t k = 1; k <= 40; k += 9) {
27404 GemmMicrokernelTester()
27405 .mr(1)
27406 .nr(4)
27407 .kr(8)
27408 .sr(1)
27409 .m(1)
27410 .n(4)
27411 .k(k)
27412 .ks(3)
27413 .a_offset(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080027414 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070027415 }
27416 }
27417
Marat Dukhandfc2db02021-08-08 21:19:07 -070027418 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL32_LD128, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027419 for (size_t k = 1; k <= 40; k += 9) {
27420 for (uint32_t mz = 0; mz < 1; mz++) {
Marat Dukhan43bee052021-07-14 20:57:18 -070027421 GemmMicrokernelTester()
27422 .mr(1)
27423 .nr(4)
27424 .kr(8)
27425 .sr(1)
27426 .m(1)
27427 .n(4)
27428 .k(k)
27429 .ks(3)
27430 .a_offset(43)
27431 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080027432 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070027433 }
27434 }
27435 }
27436
Marat Dukhandfc2db02021-08-08 21:19:07 -070027437 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL32_LD128, qmin) {
Marat Dukhan43bee052021-07-14 20:57:18 -070027438 GemmMicrokernelTester()
27439 .mr(1)
27440 .nr(4)
27441 .kr(8)
27442 .sr(1)
27443 .m(1)
27444 .n(4)
27445 .k(8)
27446 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080027447 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070027448 }
27449
Marat Dukhandfc2db02021-08-08 21:19:07 -070027450 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL32_LD128, qmax) {
Marat Dukhan43bee052021-07-14 20:57:18 -070027451 GemmMicrokernelTester()
27452 .mr(1)
27453 .nr(4)
27454 .kr(8)
27455 .sr(1)
27456 .m(1)
27457 .n(4)
27458 .k(8)
27459 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080027460 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070027461 }
27462
Marat Dukhandfc2db02021-08-08 21:19:07 -070027463 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL32_LD128, strided_cm) {
Marat Dukhan43bee052021-07-14 20:57:18 -070027464 GemmMicrokernelTester()
27465 .mr(1)
27466 .nr(4)
27467 .kr(8)
27468 .sr(1)
27469 .m(1)
27470 .n(4)
27471 .k(8)
27472 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080027473 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070027474 }
27475
Marat Dukhandfc2db02021-08-08 21:19:07 -070027476 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL32_LD128, no_a_zero_point) {
Marat Dukhan43bee052021-07-14 20:57:18 -070027477 for (size_t k = 1; k <= 40; k += 9) {
27478 GemmMicrokernelTester()
27479 .mr(1)
27480 .nr(4)
27481 .kr(8)
27482 .sr(1)
27483 .m(1)
27484 .n(4)
27485 .k(k)
27486 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080027487 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070027488 }
27489 }
27490
Marat Dukhandfc2db02021-08-08 21:19:07 -070027491 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL32_LD128, no_b_zero_point) {
Marat Dukhan43bee052021-07-14 20:57:18 -070027492 for (size_t k = 1; k <= 40; k += 9) {
27493 GemmMicrokernelTester()
27494 .mr(1)
27495 .nr(4)
27496 .kr(8)
27497 .sr(1)
27498 .m(1)
27499 .n(4)
27500 .k(k)
27501 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080027502 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070027503 }
27504 }
27505
Marat Dukhandfc2db02021-08-08 21:19:07 -070027506 TEST(QU8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_MUL32_LD128, no_zero_point) {
Marat Dukhan43bee052021-07-14 20:57:18 -070027507 for (size_t k = 1; k <= 40; k += 9) {
27508 GemmMicrokernelTester()
27509 .mr(1)
27510 .nr(4)
27511 .kr(8)
27512 .sr(1)
27513 .m(1)
27514 .n(4)
27515 .k(k)
27516 .a_zero_point(0)
27517 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080027518 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070027519 }
27520 }
Marat Dukhan4c617792021-12-21 15:47:58 -080027521#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan43bee052021-07-14 20:57:18 -070027522
27523
Marat Dukhan4c617792021-12-21 15:47:58 -080027524#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhandfc2db02021-08-08 21:19:07 -070027525 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD128, k_eq_8) {
Marat Dukhan43bee052021-07-14 20:57:18 -070027526 GemmMicrokernelTester()
27527 .mr(3)
27528 .nr(4)
27529 .kr(8)
27530 .sr(1)
27531 .m(3)
27532 .n(4)
27533 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080027534 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070027535 }
27536
Marat Dukhandfc2db02021-08-08 21:19:07 -070027537 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD128, strided_cn) {
Marat Dukhan43bee052021-07-14 20:57:18 -070027538 GemmMicrokernelTester()
27539 .mr(3)
27540 .nr(4)
27541 .kr(8)
27542 .sr(1)
27543 .m(3)
27544 .n(4)
27545 .k(8)
27546 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080027547 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070027548 }
27549
Marat Dukhandfc2db02021-08-08 21:19:07 -070027550 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD128, k_eq_8_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027551 for (uint32_t n = 1; n <= 4; n++) {
27552 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan43bee052021-07-14 20:57:18 -070027553 GemmMicrokernelTester()
27554 .mr(3)
27555 .nr(4)
27556 .kr(8)
27557 .sr(1)
27558 .m(m)
27559 .n(n)
27560 .k(8)
27561 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027562 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070027563 }
27564 }
27565 }
27566
Marat Dukhandfc2db02021-08-08 21:19:07 -070027567 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD128, k_eq_8_subtile_m) {
Marat Dukhan43bee052021-07-14 20:57:18 -070027568 for (uint32_t m = 1; m <= 3; m++) {
27569 GemmMicrokernelTester()
27570 .mr(3)
27571 .nr(4)
27572 .kr(8)
27573 .sr(1)
27574 .m(m)
27575 .n(4)
27576 .k(8)
27577 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027578 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070027579 }
27580 }
27581
Marat Dukhandfc2db02021-08-08 21:19:07 -070027582 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD128, k_eq_8_subtile_n) {
Marat Dukhan43bee052021-07-14 20:57:18 -070027583 for (uint32_t n = 1; n <= 4; n++) {
27584 GemmMicrokernelTester()
27585 .mr(3)
27586 .nr(4)
27587 .kr(8)
27588 .sr(1)
27589 .m(3)
27590 .n(n)
27591 .k(8)
27592 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027593 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070027594 }
27595 }
27596
Marat Dukhandfc2db02021-08-08 21:19:07 -070027597 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD128, k_lt_8) {
Marat Dukhan43bee052021-07-14 20:57:18 -070027598 for (size_t k = 1; k < 8; k++) {
27599 GemmMicrokernelTester()
27600 .mr(3)
27601 .nr(4)
27602 .kr(8)
27603 .sr(1)
27604 .m(3)
27605 .n(4)
27606 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080027607 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070027608 }
27609 }
27610
Marat Dukhandfc2db02021-08-08 21:19:07 -070027611 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD128, k_lt_8_subtile) {
Marat Dukhan43bee052021-07-14 20:57:18 -070027612 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027613 for (uint32_t n = 1; n <= 4; n++) {
27614 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan43bee052021-07-14 20:57:18 -070027615 GemmMicrokernelTester()
27616 .mr(3)
27617 .nr(4)
27618 .kr(8)
27619 .sr(1)
27620 .m(m)
27621 .n(n)
27622 .k(k)
27623 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027624 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070027625 }
27626 }
27627 }
27628 }
27629
Marat Dukhandfc2db02021-08-08 21:19:07 -070027630 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD128, k_gt_8) {
Marat Dukhan43bee052021-07-14 20:57:18 -070027631 for (size_t k = 9; k < 16; k++) {
27632 GemmMicrokernelTester()
27633 .mr(3)
27634 .nr(4)
27635 .kr(8)
27636 .sr(1)
27637 .m(3)
27638 .n(4)
27639 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080027640 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070027641 }
27642 }
27643
Marat Dukhandfc2db02021-08-08 21:19:07 -070027644 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD128, k_gt_8_subtile) {
Marat Dukhan43bee052021-07-14 20:57:18 -070027645 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027646 for (uint32_t n = 1; n <= 4; n++) {
27647 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan43bee052021-07-14 20:57:18 -070027648 GemmMicrokernelTester()
27649 .mr(3)
27650 .nr(4)
27651 .kr(8)
27652 .sr(1)
27653 .m(m)
27654 .n(n)
27655 .k(k)
27656 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027657 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070027658 }
27659 }
27660 }
27661 }
27662
Marat Dukhandfc2db02021-08-08 21:19:07 -070027663 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD128, k_div_8) {
Marat Dukhan43bee052021-07-14 20:57:18 -070027664 for (size_t k = 16; k <= 80; k += 8) {
27665 GemmMicrokernelTester()
27666 .mr(3)
27667 .nr(4)
27668 .kr(8)
27669 .sr(1)
27670 .m(3)
27671 .n(4)
27672 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080027673 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070027674 }
27675 }
27676
Marat Dukhandfc2db02021-08-08 21:19:07 -070027677 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD128, k_div_8_subtile) {
Marat Dukhan43bee052021-07-14 20:57:18 -070027678 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027679 for (uint32_t n = 1; n <= 4; n++) {
27680 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan43bee052021-07-14 20:57:18 -070027681 GemmMicrokernelTester()
27682 .mr(3)
27683 .nr(4)
27684 .kr(8)
27685 .sr(1)
27686 .m(m)
27687 .n(n)
27688 .k(k)
27689 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027690 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070027691 }
27692 }
27693 }
27694 }
27695
Marat Dukhandfc2db02021-08-08 21:19:07 -070027696 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD128, n_gt_4) {
Marat Dukhan43bee052021-07-14 20:57:18 -070027697 for (uint32_t n = 5; n < 8; n++) {
27698 for (size_t k = 1; k <= 40; k += 9) {
27699 GemmMicrokernelTester()
27700 .mr(3)
27701 .nr(4)
27702 .kr(8)
27703 .sr(1)
27704 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080027705 .n(n)
Marat Dukhan43bee052021-07-14 20:57:18 -070027706 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080027707 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070027708 }
27709 }
27710 }
27711
Marat Dukhandfc2db02021-08-08 21:19:07 -070027712 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD128, n_gt_4_strided_cn) {
Marat Dukhan43bee052021-07-14 20:57:18 -070027713 for (uint32_t n = 5; n < 8; n++) {
27714 for (size_t k = 1; k <= 40; k += 9) {
27715 GemmMicrokernelTester()
27716 .mr(3)
27717 .nr(4)
27718 .kr(8)
27719 .sr(1)
27720 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080027721 .n(n)
Marat Dukhan43bee052021-07-14 20:57:18 -070027722 .k(k)
27723 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080027724 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070027725 }
27726 }
27727 }
27728
Marat Dukhandfc2db02021-08-08 21:19:07 -070027729 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD128, n_gt_4_subtile) {
Marat Dukhan43bee052021-07-14 20:57:18 -070027730 for (uint32_t n = 5; n < 8; n++) {
27731 for (size_t k = 1; k <= 40; k += 9) {
27732 for (uint32_t m = 1; m <= 3; m++) {
27733 GemmMicrokernelTester()
27734 .mr(3)
27735 .nr(4)
27736 .kr(8)
27737 .sr(1)
27738 .m(m)
27739 .n(n)
27740 .k(k)
27741 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027742 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070027743 }
27744 }
27745 }
27746 }
27747
Marat Dukhandfc2db02021-08-08 21:19:07 -070027748 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD128, n_div_4) {
Marat Dukhan43bee052021-07-14 20:57:18 -070027749 for (uint32_t n = 8; n <= 12; n += 4) {
27750 for (size_t k = 1; k <= 40; k += 9) {
27751 GemmMicrokernelTester()
27752 .mr(3)
27753 .nr(4)
27754 .kr(8)
27755 .sr(1)
27756 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080027757 .n(n)
Marat Dukhan43bee052021-07-14 20:57:18 -070027758 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080027759 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070027760 }
27761 }
27762 }
27763
Marat Dukhandfc2db02021-08-08 21:19:07 -070027764 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD128, n_div_4_strided_cn) {
Marat Dukhan43bee052021-07-14 20:57:18 -070027765 for (uint32_t n = 8; n <= 12; n += 4) {
27766 for (size_t k = 1; k <= 40; k += 9) {
27767 GemmMicrokernelTester()
27768 .mr(3)
27769 .nr(4)
27770 .kr(8)
27771 .sr(1)
27772 .m(3)
27773 .n(n)
27774 .k(k)
27775 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080027776 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070027777 }
27778 }
27779 }
27780
Marat Dukhandfc2db02021-08-08 21:19:07 -070027781 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD128, n_div_4_subtile) {
Marat Dukhan43bee052021-07-14 20:57:18 -070027782 for (uint32_t n = 8; n <= 12; n += 4) {
27783 for (size_t k = 1; k <= 40; k += 9) {
27784 for (uint32_t m = 1; m <= 3; m++) {
27785 GemmMicrokernelTester()
27786 .mr(3)
27787 .nr(4)
27788 .kr(8)
27789 .sr(1)
27790 .m(m)
27791 .n(n)
27792 .k(k)
27793 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027794 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070027795 }
27796 }
27797 }
27798 }
27799
Marat Dukhandfc2db02021-08-08 21:19:07 -070027800 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD128, small_kernel) {
Marat Dukhan43bee052021-07-14 20:57:18 -070027801 for (size_t k = 1; k <= 40; k += 9) {
27802 GemmMicrokernelTester()
27803 .mr(3)
27804 .nr(4)
27805 .kr(8)
27806 .sr(1)
27807 .m(3)
27808 .n(4)
27809 .k(k)
27810 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080027811 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070027812 }
27813 }
27814
Marat Dukhandfc2db02021-08-08 21:19:07 -070027815 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD128, small_kernel_subtile) {
Marat Dukhan43bee052021-07-14 20:57:18 -070027816 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027817 for (uint32_t n = 1; n <= 4; n++) {
27818 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan43bee052021-07-14 20:57:18 -070027819 GemmMicrokernelTester()
27820 .mr(3)
27821 .nr(4)
27822 .kr(8)
27823 .sr(1)
27824 .m(m)
27825 .n(n)
27826 .k(k)
27827 .ks(3)
27828 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027829 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070027830 }
27831 }
27832 }
27833 }
27834
Marat Dukhandfc2db02021-08-08 21:19:07 -070027835 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD128, n_gt_4_small_kernel) {
Marat Dukhan43bee052021-07-14 20:57:18 -070027836 for (uint32_t n = 5; n < 8; n++) {
27837 for (size_t k = 1; k <= 40; k += 9) {
27838 GemmMicrokernelTester()
27839 .mr(3)
27840 .nr(4)
27841 .kr(8)
27842 .sr(1)
27843 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080027844 .n(n)
Marat Dukhan43bee052021-07-14 20:57:18 -070027845 .k(k)
27846 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080027847 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070027848 }
27849 }
27850 }
27851
Marat Dukhandfc2db02021-08-08 21:19:07 -070027852 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD128, n_div_4_small_kernel) {
Marat Dukhan43bee052021-07-14 20:57:18 -070027853 for (uint32_t n = 8; n <= 12; n += 4) {
27854 for (size_t k = 1; k <= 40; k += 9) {
27855 GemmMicrokernelTester()
27856 .mr(3)
27857 .nr(4)
27858 .kr(8)
27859 .sr(1)
27860 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080027861 .n(n)
Marat Dukhan43bee052021-07-14 20:57:18 -070027862 .k(k)
27863 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080027864 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070027865 }
27866 }
27867 }
27868
Marat Dukhandfc2db02021-08-08 21:19:07 -070027869 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD128, strided_cm_subtile) {
Marat Dukhan43bee052021-07-14 20:57:18 -070027870 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027871 for (uint32_t n = 1; n <= 4; n++) {
27872 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan43bee052021-07-14 20:57:18 -070027873 GemmMicrokernelTester()
27874 .mr(3)
27875 .nr(4)
27876 .kr(8)
27877 .sr(1)
27878 .m(m)
27879 .n(n)
27880 .k(k)
27881 .cm_stride(7)
27882 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080027883 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070027884 }
27885 }
27886 }
27887 }
27888
Marat Dukhandfc2db02021-08-08 21:19:07 -070027889 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD128, a_offset) {
Marat Dukhan43bee052021-07-14 20:57:18 -070027890 for (size_t k = 1; k <= 40; k += 9) {
27891 GemmMicrokernelTester()
27892 .mr(3)
27893 .nr(4)
27894 .kr(8)
27895 .sr(1)
27896 .m(3)
27897 .n(4)
27898 .k(k)
27899 .ks(3)
27900 .a_offset(127)
Marat Dukhan50323b82022-01-11 00:12:01 -080027901 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070027902 }
27903 }
27904
Marat Dukhandfc2db02021-08-08 21:19:07 -070027905 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD128, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080027906 for (size_t k = 1; k <= 40; k += 9) {
27907 for (uint32_t mz = 0; mz < 3; mz++) {
Marat Dukhan43bee052021-07-14 20:57:18 -070027908 GemmMicrokernelTester()
27909 .mr(3)
27910 .nr(4)
27911 .kr(8)
27912 .sr(1)
27913 .m(3)
27914 .n(4)
27915 .k(k)
27916 .ks(3)
27917 .a_offset(127)
27918 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080027919 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070027920 }
27921 }
27922 }
27923
Marat Dukhandfc2db02021-08-08 21:19:07 -070027924 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD128, qmin) {
Marat Dukhan43bee052021-07-14 20:57:18 -070027925 GemmMicrokernelTester()
27926 .mr(3)
27927 .nr(4)
27928 .kr(8)
27929 .sr(1)
27930 .m(3)
27931 .n(4)
27932 .k(8)
27933 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080027934 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070027935 }
27936
Marat Dukhandfc2db02021-08-08 21:19:07 -070027937 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD128, qmax) {
Marat Dukhan43bee052021-07-14 20:57:18 -070027938 GemmMicrokernelTester()
27939 .mr(3)
27940 .nr(4)
27941 .kr(8)
27942 .sr(1)
27943 .m(3)
27944 .n(4)
27945 .k(8)
27946 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080027947 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070027948 }
27949
Marat Dukhandfc2db02021-08-08 21:19:07 -070027950 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD128, strided_cm) {
Marat Dukhan43bee052021-07-14 20:57:18 -070027951 GemmMicrokernelTester()
27952 .mr(3)
27953 .nr(4)
27954 .kr(8)
27955 .sr(1)
27956 .m(3)
27957 .n(4)
27958 .k(8)
27959 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080027960 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070027961 }
27962
Marat Dukhandfc2db02021-08-08 21:19:07 -070027963 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD128, no_a_zero_point) {
Marat Dukhan43bee052021-07-14 20:57:18 -070027964 for (size_t k = 1; k <= 40; k += 9) {
27965 GemmMicrokernelTester()
27966 .mr(3)
27967 .nr(4)
27968 .kr(8)
27969 .sr(1)
27970 .m(3)
27971 .n(4)
27972 .k(k)
27973 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080027974 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070027975 }
27976 }
27977
Marat Dukhandfc2db02021-08-08 21:19:07 -070027978 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD128, no_b_zero_point) {
Marat Dukhan43bee052021-07-14 20:57:18 -070027979 for (size_t k = 1; k <= 40; k += 9) {
27980 GemmMicrokernelTester()
27981 .mr(3)
27982 .nr(4)
27983 .kr(8)
27984 .sr(1)
27985 .m(3)
27986 .n(4)
27987 .k(k)
27988 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080027989 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070027990 }
27991 }
27992
Marat Dukhandfc2db02021-08-08 21:19:07 -070027993 TEST(QU8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_MUL32_LD128, no_zero_point) {
Marat Dukhan43bee052021-07-14 20:57:18 -070027994 for (size_t k = 1; k <= 40; k += 9) {
27995 GemmMicrokernelTester()
27996 .mr(3)
27997 .nr(4)
27998 .kr(8)
27999 .sr(1)
28000 .m(3)
28001 .n(4)
28002 .k(k)
28003 .a_zero_point(0)
28004 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080028005 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld128, xnn_init_qu8_conv_minmax_fp32_wasmsimd_params, xnn_qu8_requantize_fp32);
Marat Dukhan43bee052021-07-14 20:57:18 -070028006 }
28007 }
Marat Dukhan4c617792021-12-21 15:47:58 -080028008#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan927d4742021-07-15 13:42:49 -070028009
28010
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028011#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
28012 TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, k_eq_1) {
28013 GemmMicrokernelTester()
28014 .mr(1)
28015 .nr(2)
28016 .kr(1)
28017 .sr(1)
28018 .m(1)
28019 .n(2)
28020 .k(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028021 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028022 }
28023
28024 TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, strided_cn) {
28025 GemmMicrokernelTester()
28026 .mr(1)
28027 .nr(2)
28028 .kr(1)
28029 .sr(1)
28030 .m(1)
28031 .n(2)
28032 .k(1)
28033 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080028034 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028035 }
28036
28037 TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028038 for (uint32_t n = 1; n <= 2; n++) {
28039 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028040 GemmMicrokernelTester()
28041 .mr(1)
28042 .nr(2)
28043 .kr(1)
28044 .sr(1)
28045 .m(m)
28046 .n(n)
28047 .k(1)
28048 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028049 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028050 }
28051 }
28052 }
28053
28054 TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, k_eq_1_subtile_m) {
28055 for (uint32_t m = 1; m <= 1; m++) {
28056 GemmMicrokernelTester()
28057 .mr(1)
28058 .nr(2)
28059 .kr(1)
28060 .sr(1)
28061 .m(m)
28062 .n(2)
28063 .k(1)
28064 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028065 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028066 }
28067 }
28068
28069 TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, k_eq_1_subtile_n) {
28070 for (uint32_t n = 1; n <= 2; n++) {
28071 GemmMicrokernelTester()
28072 .mr(1)
28073 .nr(2)
28074 .kr(1)
28075 .sr(1)
28076 .m(1)
28077 .n(n)
28078 .k(1)
28079 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028080 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028081 }
28082 }
28083
28084 TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, k_gt_1) {
28085 for (size_t k = 2; k < 10; k++) {
28086 GemmMicrokernelTester()
28087 .mr(1)
28088 .nr(2)
28089 .kr(1)
28090 .sr(1)
28091 .m(1)
28092 .n(2)
28093 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080028094 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028095 }
28096 }
28097
28098 TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, k_gt_1_subtile) {
28099 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028100 for (uint32_t n = 1; n <= 2; n++) {
28101 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028102 GemmMicrokernelTester()
28103 .mr(1)
28104 .nr(2)
28105 .kr(1)
28106 .sr(1)
28107 .m(m)
28108 .n(n)
28109 .k(k)
28110 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028111 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028112 }
28113 }
28114 }
28115 }
28116
28117 TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_gt_2) {
28118 for (uint32_t n = 3; n < 4; n++) {
28119 for (size_t k = 1; k <= 5; k += 2) {
28120 GemmMicrokernelTester()
28121 .mr(1)
28122 .nr(2)
28123 .kr(1)
28124 .sr(1)
28125 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080028126 .n(n)
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028127 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080028128 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028129 }
28130 }
28131 }
28132
28133 TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_gt_2_strided_cn) {
28134 for (uint32_t n = 3; n < 4; n++) {
28135 for (size_t k = 1; k <= 5; k += 2) {
28136 GemmMicrokernelTester()
28137 .mr(1)
28138 .nr(2)
28139 .kr(1)
28140 .sr(1)
28141 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080028142 .n(n)
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028143 .k(k)
28144 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080028145 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028146 }
28147 }
28148 }
28149
28150 TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_gt_2_subtile) {
28151 for (uint32_t n = 3; n < 4; n++) {
28152 for (size_t k = 1; k <= 5; k += 2) {
28153 for (uint32_t m = 1; m <= 1; m++) {
28154 GemmMicrokernelTester()
28155 .mr(1)
28156 .nr(2)
28157 .kr(1)
28158 .sr(1)
28159 .m(m)
28160 .n(n)
28161 .k(k)
28162 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028163 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028164 }
28165 }
28166 }
28167 }
28168
28169 TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_div_2) {
28170 for (uint32_t n = 4; n <= 6; n += 2) {
28171 for (size_t k = 1; k <= 5; k += 2) {
28172 GemmMicrokernelTester()
28173 .mr(1)
28174 .nr(2)
28175 .kr(1)
28176 .sr(1)
28177 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080028178 .n(n)
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028179 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080028180 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028181 }
28182 }
28183 }
28184
28185 TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_div_2_strided_cn) {
28186 for (uint32_t n = 4; n <= 6; n += 2) {
28187 for (size_t k = 1; k <= 5; k += 2) {
28188 GemmMicrokernelTester()
28189 .mr(1)
28190 .nr(2)
28191 .kr(1)
28192 .sr(1)
28193 .m(1)
28194 .n(n)
28195 .k(k)
28196 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080028197 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028198 }
28199 }
28200 }
28201
28202 TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_div_2_subtile) {
28203 for (uint32_t n = 4; n <= 6; n += 2) {
28204 for (size_t k = 1; k <= 5; k += 2) {
28205 for (uint32_t m = 1; m <= 1; m++) {
28206 GemmMicrokernelTester()
28207 .mr(1)
28208 .nr(2)
28209 .kr(1)
28210 .sr(1)
28211 .m(m)
28212 .n(n)
28213 .k(k)
28214 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028215 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028216 }
28217 }
28218 }
28219 }
28220
28221 TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, small_kernel) {
28222 for (size_t k = 1; k <= 5; k += 2) {
28223 GemmMicrokernelTester()
28224 .mr(1)
28225 .nr(2)
28226 .kr(1)
28227 .sr(1)
28228 .m(1)
28229 .n(2)
28230 .k(k)
28231 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080028232 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028233 }
28234 }
28235
28236 TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, small_kernel_subtile) {
28237 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028238 for (uint32_t n = 1; n <= 2; n++) {
28239 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028240 GemmMicrokernelTester()
28241 .mr(1)
28242 .nr(2)
28243 .kr(1)
28244 .sr(1)
28245 .m(m)
28246 .n(n)
28247 .k(k)
28248 .ks(3)
28249 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028250 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028251 }
28252 }
28253 }
28254 }
28255
28256 TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_gt_2_small_kernel) {
28257 for (uint32_t n = 3; n < 4; n++) {
28258 for (size_t k = 1; k <= 5; k += 2) {
28259 GemmMicrokernelTester()
28260 .mr(1)
28261 .nr(2)
28262 .kr(1)
28263 .sr(1)
28264 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080028265 .n(n)
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028266 .k(k)
28267 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080028268 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028269 }
28270 }
28271 }
28272
28273 TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_div_2_small_kernel) {
28274 for (uint32_t n = 4; n <= 6; n += 2) {
28275 for (size_t k = 1; k <= 5; k += 2) {
28276 GemmMicrokernelTester()
28277 .mr(1)
28278 .nr(2)
28279 .kr(1)
28280 .sr(1)
28281 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080028282 .n(n)
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028283 .k(k)
28284 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080028285 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028286 }
28287 }
28288 }
28289
28290 TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, strided_cm_subtile) {
28291 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028292 for (uint32_t n = 1; n <= 2; n++) {
28293 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028294 GemmMicrokernelTester()
28295 .mr(1)
28296 .nr(2)
28297 .kr(1)
28298 .sr(1)
28299 .m(m)
28300 .n(n)
28301 .k(k)
28302 .cm_stride(5)
28303 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028304 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028305 }
28306 }
28307 }
28308 }
28309
28310 TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, a_offset) {
28311 for (size_t k = 1; k <= 5; k += 2) {
28312 GemmMicrokernelTester()
28313 .mr(1)
28314 .nr(2)
28315 .kr(1)
28316 .sr(1)
28317 .m(1)
28318 .n(2)
28319 .k(k)
28320 .ks(3)
28321 .a_offset(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080028322 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028323 }
28324 }
28325
28326 TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028327 for (size_t k = 1; k <= 5; k += 2) {
28328 for (uint32_t mz = 0; mz < 1; mz++) {
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028329 GemmMicrokernelTester()
28330 .mr(1)
28331 .nr(2)
28332 .kr(1)
28333 .sr(1)
28334 .m(1)
28335 .n(2)
28336 .k(k)
28337 .ks(3)
28338 .a_offset(7)
28339 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080028340 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028341 }
28342 }
28343 }
28344
28345 TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, qmin) {
28346 GemmMicrokernelTester()
28347 .mr(1)
28348 .nr(2)
28349 .kr(1)
28350 .sr(1)
28351 .m(1)
28352 .n(2)
28353 .k(1)
28354 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080028355 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028356 }
28357
28358 TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, qmax) {
28359 GemmMicrokernelTester()
28360 .mr(1)
28361 .nr(2)
28362 .kr(1)
28363 .sr(1)
28364 .m(1)
28365 .n(2)
28366 .k(1)
28367 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080028368 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028369 }
28370
28371 TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, strided_cm) {
28372 GemmMicrokernelTester()
28373 .mr(1)
28374 .nr(2)
28375 .kr(1)
28376 .sr(1)
28377 .m(1)
28378 .n(2)
28379 .k(1)
28380 .cm_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080028381 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028382 }
28383
28384 TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, no_a_zero_point) {
28385 for (size_t k = 1; k <= 5; k += 2) {
28386 GemmMicrokernelTester()
28387 .mr(1)
28388 .nr(2)
28389 .kr(1)
28390 .sr(1)
28391 .m(1)
28392 .n(2)
28393 .k(k)
28394 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080028395 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028396 }
28397 }
28398
28399 TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, no_b_zero_point) {
28400 for (size_t k = 1; k <= 5; k += 2) {
28401 GemmMicrokernelTester()
28402 .mr(1)
28403 .nr(2)
28404 .kr(1)
28405 .sr(1)
28406 .m(1)
28407 .n(2)
28408 .k(k)
28409 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080028410 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028411 }
28412 }
28413
28414 TEST(QU8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, no_zero_point) {
28415 for (size_t k = 1; k <= 5; k += 2) {
28416 GemmMicrokernelTester()
28417 .mr(1)
28418 .nr(2)
28419 .kr(1)
28420 .sr(1)
28421 .m(1)
28422 .n(2)
28423 .k(k)
28424 .a_zero_point(0)
28425 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080028426 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028427 }
28428 }
28429#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
28430
28431
28432#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
28433 TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, k_eq_1) {
28434 GemmMicrokernelTester()
28435 .mr(2)
28436 .nr(2)
28437 .kr(1)
28438 .sr(1)
28439 .m(2)
28440 .n(2)
28441 .k(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028442 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028443 }
28444
28445 TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, strided_cn) {
28446 GemmMicrokernelTester()
28447 .mr(2)
28448 .nr(2)
28449 .kr(1)
28450 .sr(1)
28451 .m(2)
28452 .n(2)
28453 .k(1)
28454 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080028455 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028456 }
28457
28458 TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028459 for (uint32_t n = 1; n <= 2; n++) {
28460 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028461 GemmMicrokernelTester()
28462 .mr(2)
28463 .nr(2)
28464 .kr(1)
28465 .sr(1)
28466 .m(m)
28467 .n(n)
28468 .k(1)
28469 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028470 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028471 }
28472 }
28473 }
28474
28475 TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, k_eq_1_subtile_m) {
28476 for (uint32_t m = 1; m <= 2; m++) {
28477 GemmMicrokernelTester()
28478 .mr(2)
28479 .nr(2)
28480 .kr(1)
28481 .sr(1)
28482 .m(m)
28483 .n(2)
28484 .k(1)
28485 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028486 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028487 }
28488 }
28489
28490 TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, k_eq_1_subtile_n) {
28491 for (uint32_t n = 1; n <= 2; n++) {
28492 GemmMicrokernelTester()
28493 .mr(2)
28494 .nr(2)
28495 .kr(1)
28496 .sr(1)
28497 .m(2)
28498 .n(n)
28499 .k(1)
28500 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028501 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028502 }
28503 }
28504
28505 TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, k_gt_1) {
28506 for (size_t k = 2; k < 10; k++) {
28507 GemmMicrokernelTester()
28508 .mr(2)
28509 .nr(2)
28510 .kr(1)
28511 .sr(1)
28512 .m(2)
28513 .n(2)
28514 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080028515 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028516 }
28517 }
28518
28519 TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, k_gt_1_subtile) {
28520 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028521 for (uint32_t n = 1; n <= 2; n++) {
28522 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028523 GemmMicrokernelTester()
28524 .mr(2)
28525 .nr(2)
28526 .kr(1)
28527 .sr(1)
28528 .m(m)
28529 .n(n)
28530 .k(k)
28531 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028532 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028533 }
28534 }
28535 }
28536 }
28537
28538 TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_gt_2) {
28539 for (uint32_t n = 3; n < 4; n++) {
28540 for (size_t k = 1; k <= 5; k += 2) {
28541 GemmMicrokernelTester()
28542 .mr(2)
28543 .nr(2)
28544 .kr(1)
28545 .sr(1)
28546 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080028547 .n(n)
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028548 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080028549 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028550 }
28551 }
28552 }
28553
28554 TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_gt_2_strided_cn) {
28555 for (uint32_t n = 3; n < 4; n++) {
28556 for (size_t k = 1; k <= 5; k += 2) {
28557 GemmMicrokernelTester()
28558 .mr(2)
28559 .nr(2)
28560 .kr(1)
28561 .sr(1)
28562 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080028563 .n(n)
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028564 .k(k)
28565 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080028566 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028567 }
28568 }
28569 }
28570
28571 TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_gt_2_subtile) {
28572 for (uint32_t n = 3; n < 4; n++) {
28573 for (size_t k = 1; k <= 5; k += 2) {
28574 for (uint32_t m = 1; m <= 2; m++) {
28575 GemmMicrokernelTester()
28576 .mr(2)
28577 .nr(2)
28578 .kr(1)
28579 .sr(1)
28580 .m(m)
28581 .n(n)
28582 .k(k)
28583 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028584 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028585 }
28586 }
28587 }
28588 }
28589
28590 TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_div_2) {
28591 for (uint32_t n = 4; n <= 6; n += 2) {
28592 for (size_t k = 1; k <= 5; k += 2) {
28593 GemmMicrokernelTester()
28594 .mr(2)
28595 .nr(2)
28596 .kr(1)
28597 .sr(1)
28598 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080028599 .n(n)
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028600 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080028601 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028602 }
28603 }
28604 }
28605
28606 TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_div_2_strided_cn) {
28607 for (uint32_t n = 4; n <= 6; n += 2) {
28608 for (size_t k = 1; k <= 5; k += 2) {
28609 GemmMicrokernelTester()
28610 .mr(2)
28611 .nr(2)
28612 .kr(1)
28613 .sr(1)
28614 .m(2)
28615 .n(n)
28616 .k(k)
28617 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080028618 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028619 }
28620 }
28621 }
28622
28623 TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_div_2_subtile) {
28624 for (uint32_t n = 4; n <= 6; n += 2) {
28625 for (size_t k = 1; k <= 5; k += 2) {
28626 for (uint32_t m = 1; m <= 2; m++) {
28627 GemmMicrokernelTester()
28628 .mr(2)
28629 .nr(2)
28630 .kr(1)
28631 .sr(1)
28632 .m(m)
28633 .n(n)
28634 .k(k)
28635 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028636 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028637 }
28638 }
28639 }
28640 }
28641
28642 TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, small_kernel) {
28643 for (size_t k = 1; k <= 5; k += 2) {
28644 GemmMicrokernelTester()
28645 .mr(2)
28646 .nr(2)
28647 .kr(1)
28648 .sr(1)
28649 .m(2)
28650 .n(2)
28651 .k(k)
28652 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080028653 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028654 }
28655 }
28656
28657 TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, small_kernel_subtile) {
28658 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028659 for (uint32_t n = 1; n <= 2; n++) {
28660 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028661 GemmMicrokernelTester()
28662 .mr(2)
28663 .nr(2)
28664 .kr(1)
28665 .sr(1)
28666 .m(m)
28667 .n(n)
28668 .k(k)
28669 .ks(3)
28670 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028671 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028672 }
28673 }
28674 }
28675 }
28676
28677 TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_gt_2_small_kernel) {
28678 for (uint32_t n = 3; n < 4; n++) {
28679 for (size_t k = 1; k <= 5; k += 2) {
28680 GemmMicrokernelTester()
28681 .mr(2)
28682 .nr(2)
28683 .kr(1)
28684 .sr(1)
28685 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080028686 .n(n)
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028687 .k(k)
28688 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080028689 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028690 }
28691 }
28692 }
28693
28694 TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_div_2_small_kernel) {
28695 for (uint32_t n = 4; n <= 6; n += 2) {
28696 for (size_t k = 1; k <= 5; k += 2) {
28697 GemmMicrokernelTester()
28698 .mr(2)
28699 .nr(2)
28700 .kr(1)
28701 .sr(1)
28702 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080028703 .n(n)
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028704 .k(k)
28705 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080028706 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028707 }
28708 }
28709 }
28710
28711 TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, strided_cm_subtile) {
28712 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028713 for (uint32_t n = 1; n <= 2; n++) {
28714 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028715 GemmMicrokernelTester()
28716 .mr(2)
28717 .nr(2)
28718 .kr(1)
28719 .sr(1)
28720 .m(m)
28721 .n(n)
28722 .k(k)
28723 .cm_stride(5)
28724 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028725 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028726 }
28727 }
28728 }
28729 }
28730
28731 TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, a_offset) {
28732 for (size_t k = 1; k <= 5; k += 2) {
28733 GemmMicrokernelTester()
28734 .mr(2)
28735 .nr(2)
28736 .kr(1)
28737 .sr(1)
28738 .m(2)
28739 .n(2)
28740 .k(k)
28741 .ks(3)
28742 .a_offset(13)
Marat Dukhan50323b82022-01-11 00:12:01 -080028743 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028744 }
28745 }
28746
28747 TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028748 for (size_t k = 1; k <= 5; k += 2) {
28749 for (uint32_t mz = 0; mz < 2; mz++) {
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028750 GemmMicrokernelTester()
28751 .mr(2)
28752 .nr(2)
28753 .kr(1)
28754 .sr(1)
28755 .m(2)
28756 .n(2)
28757 .k(k)
28758 .ks(3)
28759 .a_offset(13)
28760 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080028761 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028762 }
28763 }
28764 }
28765
28766 TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, qmin) {
28767 GemmMicrokernelTester()
28768 .mr(2)
28769 .nr(2)
28770 .kr(1)
28771 .sr(1)
28772 .m(2)
28773 .n(2)
28774 .k(1)
28775 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080028776 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028777 }
28778
28779 TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, qmax) {
28780 GemmMicrokernelTester()
28781 .mr(2)
28782 .nr(2)
28783 .kr(1)
28784 .sr(1)
28785 .m(2)
28786 .n(2)
28787 .k(1)
28788 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080028789 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028790 }
28791
28792 TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, strided_cm) {
28793 GemmMicrokernelTester()
28794 .mr(2)
28795 .nr(2)
28796 .kr(1)
28797 .sr(1)
28798 .m(2)
28799 .n(2)
28800 .k(1)
28801 .cm_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080028802 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028803 }
28804
28805 TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, no_a_zero_point) {
28806 for (size_t k = 1; k <= 5; k += 2) {
28807 GemmMicrokernelTester()
28808 .mr(2)
28809 .nr(2)
28810 .kr(1)
28811 .sr(1)
28812 .m(2)
28813 .n(2)
28814 .k(k)
28815 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080028816 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028817 }
28818 }
28819
28820 TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, no_b_zero_point) {
28821 for (size_t k = 1; k <= 5; k += 2) {
28822 GemmMicrokernelTester()
28823 .mr(2)
28824 .nr(2)
28825 .kr(1)
28826 .sr(1)
28827 .m(2)
28828 .n(2)
28829 .k(k)
28830 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080028831 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028832 }
28833 }
28834
28835 TEST(QU8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, no_zero_point) {
28836 for (size_t k = 1; k <= 5; k += 2) {
28837 GemmMicrokernelTester()
28838 .mr(2)
28839 .nr(2)
28840 .kr(1)
28841 .sr(1)
28842 .m(2)
28843 .n(2)
28844 .k(k)
28845 .a_zero_point(0)
28846 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080028847 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028848 }
28849 }
28850#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
28851
28852
28853#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028854 TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, k_eq_1) {
28855 GemmMicrokernelTester()
28856 .mr(4)
28857 .nr(2)
28858 .kr(1)
28859 .sr(1)
28860 .m(4)
28861 .n(2)
28862 .k(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028863 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028864 }
28865
28866 TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, strided_cn) {
28867 GemmMicrokernelTester()
28868 .mr(4)
28869 .nr(2)
28870 .kr(1)
28871 .sr(1)
28872 .m(4)
28873 .n(2)
28874 .k(1)
28875 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080028876 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028877 }
28878
28879 TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028880 for (uint32_t n = 1; n <= 2; n++) {
28881 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028882 GemmMicrokernelTester()
28883 .mr(4)
28884 .nr(2)
28885 .kr(1)
28886 .sr(1)
28887 .m(m)
28888 .n(n)
28889 .k(1)
28890 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028891 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028892 }
28893 }
28894 }
28895
28896 TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, k_eq_1_subtile_m) {
28897 for (uint32_t m = 1; m <= 4; m++) {
28898 GemmMicrokernelTester()
28899 .mr(4)
28900 .nr(2)
28901 .kr(1)
28902 .sr(1)
28903 .m(m)
28904 .n(2)
28905 .k(1)
28906 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028907 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028908 }
28909 }
28910
28911 TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, k_eq_1_subtile_n) {
28912 for (uint32_t n = 1; n <= 2; n++) {
28913 GemmMicrokernelTester()
28914 .mr(4)
28915 .nr(2)
28916 .kr(1)
28917 .sr(1)
28918 .m(4)
28919 .n(n)
28920 .k(1)
28921 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028922 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028923 }
28924 }
28925
28926 TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, k_gt_1) {
28927 for (size_t k = 2; k < 10; k++) {
28928 GemmMicrokernelTester()
28929 .mr(4)
28930 .nr(2)
28931 .kr(1)
28932 .sr(1)
28933 .m(4)
28934 .n(2)
28935 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080028936 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028937 }
28938 }
28939
28940 TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, k_gt_1_subtile) {
28941 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080028942 for (uint32_t n = 1; n <= 2; n++) {
28943 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028944 GemmMicrokernelTester()
28945 .mr(4)
28946 .nr(2)
28947 .kr(1)
28948 .sr(1)
28949 .m(m)
28950 .n(n)
28951 .k(k)
28952 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080028953 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028954 }
28955 }
28956 }
28957 }
28958
28959 TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_gt_2) {
28960 for (uint32_t n = 3; n < 4; n++) {
28961 for (size_t k = 1; k <= 5; k += 2) {
28962 GemmMicrokernelTester()
28963 .mr(4)
28964 .nr(2)
28965 .kr(1)
28966 .sr(1)
28967 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080028968 .n(n)
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028969 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080028970 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028971 }
28972 }
28973 }
28974
28975 TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_gt_2_strided_cn) {
28976 for (uint32_t n = 3; n < 4; n++) {
28977 for (size_t k = 1; k <= 5; k += 2) {
28978 GemmMicrokernelTester()
28979 .mr(4)
28980 .nr(2)
28981 .kr(1)
28982 .sr(1)
28983 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080028984 .n(n)
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028985 .k(k)
28986 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080028987 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080028988 }
28989 }
28990 }
28991
28992 TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_gt_2_subtile) {
28993 for (uint32_t n = 3; n < 4; n++) {
28994 for (size_t k = 1; k <= 5; k += 2) {
28995 for (uint32_t m = 1; m <= 4; m++) {
28996 GemmMicrokernelTester()
28997 .mr(4)
28998 .nr(2)
28999 .kr(1)
29000 .sr(1)
29001 .m(m)
29002 .n(n)
29003 .k(k)
29004 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029005 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029006 }
29007 }
29008 }
29009 }
29010
29011 TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_div_2) {
29012 for (uint32_t n = 4; n <= 6; n += 2) {
29013 for (size_t k = 1; k <= 5; k += 2) {
29014 GemmMicrokernelTester()
29015 .mr(4)
29016 .nr(2)
29017 .kr(1)
29018 .sr(1)
29019 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080029020 .n(n)
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029021 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080029022 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029023 }
29024 }
29025 }
29026
29027 TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_div_2_strided_cn) {
29028 for (uint32_t n = 4; n <= 6; n += 2) {
29029 for (size_t k = 1; k <= 5; k += 2) {
29030 GemmMicrokernelTester()
29031 .mr(4)
29032 .nr(2)
29033 .kr(1)
29034 .sr(1)
29035 .m(4)
29036 .n(n)
29037 .k(k)
29038 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080029039 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029040 }
29041 }
29042 }
29043
29044 TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_div_2_subtile) {
29045 for (uint32_t n = 4; n <= 6; n += 2) {
29046 for (size_t k = 1; k <= 5; k += 2) {
29047 for (uint32_t m = 1; m <= 4; m++) {
29048 GemmMicrokernelTester()
29049 .mr(4)
29050 .nr(2)
29051 .kr(1)
29052 .sr(1)
29053 .m(m)
29054 .n(n)
29055 .k(k)
29056 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029057 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029058 }
29059 }
29060 }
29061 }
29062
29063 TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, small_kernel) {
29064 for (size_t k = 1; k <= 5; k += 2) {
29065 GemmMicrokernelTester()
29066 .mr(4)
29067 .nr(2)
29068 .kr(1)
29069 .sr(1)
29070 .m(4)
29071 .n(2)
29072 .k(k)
29073 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080029074 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029075 }
29076 }
29077
29078 TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, small_kernel_subtile) {
29079 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029080 for (uint32_t n = 1; n <= 2; n++) {
29081 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029082 GemmMicrokernelTester()
29083 .mr(4)
29084 .nr(2)
29085 .kr(1)
29086 .sr(1)
29087 .m(m)
29088 .n(n)
29089 .k(k)
29090 .ks(3)
29091 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029092 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029093 }
29094 }
29095 }
29096 }
29097
29098 TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_gt_2_small_kernel) {
29099 for (uint32_t n = 3; n < 4; n++) {
29100 for (size_t k = 1; k <= 5; k += 2) {
29101 GemmMicrokernelTester()
29102 .mr(4)
29103 .nr(2)
29104 .kr(1)
29105 .sr(1)
29106 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080029107 .n(n)
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029108 .k(k)
29109 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080029110 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029111 }
29112 }
29113 }
29114
29115 TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_div_2_small_kernel) {
29116 for (uint32_t n = 4; n <= 6; n += 2) {
29117 for (size_t k = 1; k <= 5; k += 2) {
29118 GemmMicrokernelTester()
29119 .mr(4)
29120 .nr(2)
29121 .kr(1)
29122 .sr(1)
29123 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080029124 .n(n)
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029125 .k(k)
29126 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080029127 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029128 }
29129 }
29130 }
29131
29132 TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, strided_cm_subtile) {
29133 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029134 for (uint32_t n = 1; n <= 2; n++) {
29135 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029136 GemmMicrokernelTester()
29137 .mr(4)
29138 .nr(2)
29139 .kr(1)
29140 .sr(1)
29141 .m(m)
29142 .n(n)
29143 .k(k)
29144 .cm_stride(5)
29145 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029146 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029147 }
29148 }
29149 }
29150 }
29151
29152 TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, a_offset) {
29153 for (size_t k = 1; k <= 5; k += 2) {
29154 GemmMicrokernelTester()
29155 .mr(4)
29156 .nr(2)
29157 .kr(1)
29158 .sr(1)
29159 .m(4)
29160 .n(2)
29161 .k(k)
29162 .ks(3)
29163 .a_offset(23)
Marat Dukhan50323b82022-01-11 00:12:01 -080029164 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029165 }
29166 }
29167
29168 TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029169 for (size_t k = 1; k <= 5; k += 2) {
29170 for (uint32_t mz = 0; mz < 4; mz++) {
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029171 GemmMicrokernelTester()
29172 .mr(4)
29173 .nr(2)
29174 .kr(1)
29175 .sr(1)
29176 .m(4)
29177 .n(2)
29178 .k(k)
29179 .ks(3)
29180 .a_offset(23)
29181 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080029182 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029183 }
29184 }
29185 }
29186
29187 TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, qmin) {
29188 GemmMicrokernelTester()
29189 .mr(4)
29190 .nr(2)
29191 .kr(1)
29192 .sr(1)
29193 .m(4)
29194 .n(2)
29195 .k(1)
29196 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080029197 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029198 }
29199
29200 TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, qmax) {
29201 GemmMicrokernelTester()
29202 .mr(4)
29203 .nr(2)
29204 .kr(1)
29205 .sr(1)
29206 .m(4)
29207 .n(2)
29208 .k(1)
29209 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080029210 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029211 }
29212
29213 TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, strided_cm) {
29214 GemmMicrokernelTester()
29215 .mr(4)
29216 .nr(2)
29217 .kr(1)
29218 .sr(1)
29219 .m(4)
29220 .n(2)
29221 .k(1)
29222 .cm_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080029223 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029224 }
29225
29226 TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, no_a_zero_point) {
29227 for (size_t k = 1; k <= 5; k += 2) {
29228 GemmMicrokernelTester()
29229 .mr(4)
29230 .nr(2)
29231 .kr(1)
29232 .sr(1)
29233 .m(4)
29234 .n(2)
29235 .k(k)
29236 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080029237 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029238 }
29239 }
29240
29241 TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, no_b_zero_point) {
29242 for (size_t k = 1; k <= 5; k += 2) {
29243 GemmMicrokernelTester()
29244 .mr(4)
29245 .nr(2)
29246 .kr(1)
29247 .sr(1)
29248 .m(4)
29249 .n(2)
29250 .k(k)
29251 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080029252 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029253 }
29254 }
29255
29256 TEST(QU8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, no_zero_point) {
29257 for (size_t k = 1; k <= 5; k += 2) {
29258 GemmMicrokernelTester()
29259 .mr(4)
29260 .nr(2)
29261 .kr(1)
29262 .sr(1)
29263 .m(4)
29264 .n(2)
29265 .k(k)
29266 .a_zero_point(0)
29267 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080029268 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029269 }
29270 }
29271#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
29272
29273
29274#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
29275 TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, k_eq_1) {
29276 GemmMicrokernelTester()
29277 .mr(1)
29278 .nr(4)
29279 .kr(1)
29280 .sr(1)
29281 .m(1)
29282 .n(4)
29283 .k(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029284 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029285 }
29286
29287 TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, strided_cn) {
29288 GemmMicrokernelTester()
29289 .mr(1)
29290 .nr(4)
29291 .kr(1)
29292 .sr(1)
29293 .m(1)
29294 .n(4)
29295 .k(1)
29296 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080029297 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029298 }
29299
29300 TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029301 for (uint32_t n = 1; n <= 4; n++) {
29302 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029303 GemmMicrokernelTester()
29304 .mr(1)
29305 .nr(4)
29306 .kr(1)
29307 .sr(1)
29308 .m(m)
29309 .n(n)
29310 .k(1)
29311 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029312 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029313 }
29314 }
29315 }
29316
29317 TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, k_eq_1_subtile_m) {
29318 for (uint32_t m = 1; m <= 1; m++) {
29319 GemmMicrokernelTester()
29320 .mr(1)
29321 .nr(4)
29322 .kr(1)
29323 .sr(1)
29324 .m(m)
29325 .n(4)
29326 .k(1)
29327 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029328 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029329 }
29330 }
29331
29332 TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, k_eq_1_subtile_n) {
29333 for (uint32_t n = 1; n <= 4; n++) {
29334 GemmMicrokernelTester()
29335 .mr(1)
29336 .nr(4)
29337 .kr(1)
29338 .sr(1)
29339 .m(1)
29340 .n(n)
29341 .k(1)
29342 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029343 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029344 }
29345 }
29346
29347 TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, k_gt_1) {
29348 for (size_t k = 2; k < 10; k++) {
29349 GemmMicrokernelTester()
29350 .mr(1)
29351 .nr(4)
29352 .kr(1)
29353 .sr(1)
29354 .m(1)
29355 .n(4)
29356 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080029357 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029358 }
29359 }
29360
29361 TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, k_gt_1_subtile) {
29362 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029363 for (uint32_t n = 1; n <= 4; n++) {
29364 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029365 GemmMicrokernelTester()
29366 .mr(1)
29367 .nr(4)
29368 .kr(1)
29369 .sr(1)
29370 .m(m)
29371 .n(n)
29372 .k(k)
29373 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029374 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029375 }
29376 }
29377 }
29378 }
29379
29380 TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_gt_4) {
29381 for (uint32_t n = 5; n < 8; n++) {
29382 for (size_t k = 1; k <= 5; k += 2) {
29383 GemmMicrokernelTester()
29384 .mr(1)
29385 .nr(4)
29386 .kr(1)
29387 .sr(1)
29388 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080029389 .n(n)
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029390 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080029391 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029392 }
29393 }
29394 }
29395
29396 TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_gt_4_strided_cn) {
29397 for (uint32_t n = 5; n < 8; n++) {
29398 for (size_t k = 1; k <= 5; k += 2) {
29399 GemmMicrokernelTester()
29400 .mr(1)
29401 .nr(4)
29402 .kr(1)
29403 .sr(1)
29404 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080029405 .n(n)
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029406 .k(k)
29407 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080029408 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029409 }
29410 }
29411 }
29412
29413 TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_gt_4_subtile) {
29414 for (uint32_t n = 5; n < 8; n++) {
29415 for (size_t k = 1; k <= 5; k += 2) {
29416 for (uint32_t m = 1; m <= 1; m++) {
29417 GemmMicrokernelTester()
29418 .mr(1)
29419 .nr(4)
29420 .kr(1)
29421 .sr(1)
29422 .m(m)
29423 .n(n)
29424 .k(k)
29425 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029426 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029427 }
29428 }
29429 }
29430 }
29431
29432 TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_div_4) {
29433 for (uint32_t n = 8; n <= 12; n += 4) {
29434 for (size_t k = 1; k <= 5; k += 2) {
29435 GemmMicrokernelTester()
29436 .mr(1)
29437 .nr(4)
29438 .kr(1)
29439 .sr(1)
29440 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080029441 .n(n)
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029442 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080029443 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029444 }
29445 }
29446 }
29447
29448 TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_div_4_strided_cn) {
29449 for (uint32_t n = 8; n <= 12; n += 4) {
29450 for (size_t k = 1; k <= 5; k += 2) {
29451 GemmMicrokernelTester()
29452 .mr(1)
29453 .nr(4)
29454 .kr(1)
29455 .sr(1)
29456 .m(1)
29457 .n(n)
29458 .k(k)
29459 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080029460 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029461 }
29462 }
29463 }
29464
29465 TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_div_4_subtile) {
29466 for (uint32_t n = 8; n <= 12; n += 4) {
29467 for (size_t k = 1; k <= 5; k += 2) {
29468 for (uint32_t m = 1; m <= 1; m++) {
29469 GemmMicrokernelTester()
29470 .mr(1)
29471 .nr(4)
29472 .kr(1)
29473 .sr(1)
29474 .m(m)
29475 .n(n)
29476 .k(k)
29477 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029478 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029479 }
29480 }
29481 }
29482 }
29483
29484 TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, small_kernel) {
29485 for (size_t k = 1; k <= 5; k += 2) {
29486 GemmMicrokernelTester()
29487 .mr(1)
29488 .nr(4)
29489 .kr(1)
29490 .sr(1)
29491 .m(1)
29492 .n(4)
29493 .k(k)
29494 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080029495 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029496 }
29497 }
29498
29499 TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, small_kernel_subtile) {
29500 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029501 for (uint32_t n = 1; n <= 4; n++) {
29502 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029503 GemmMicrokernelTester()
29504 .mr(1)
29505 .nr(4)
29506 .kr(1)
29507 .sr(1)
29508 .m(m)
29509 .n(n)
29510 .k(k)
29511 .ks(3)
29512 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029513 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029514 }
29515 }
29516 }
29517 }
29518
29519 TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_gt_4_small_kernel) {
29520 for (uint32_t n = 5; n < 8; n++) {
29521 for (size_t k = 1; k <= 5; k += 2) {
29522 GemmMicrokernelTester()
29523 .mr(1)
29524 .nr(4)
29525 .kr(1)
29526 .sr(1)
29527 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080029528 .n(n)
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029529 .k(k)
29530 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080029531 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029532 }
29533 }
29534 }
29535
29536 TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_div_4_small_kernel) {
29537 for (uint32_t n = 8; n <= 12; n += 4) {
29538 for (size_t k = 1; k <= 5; k += 2) {
29539 GemmMicrokernelTester()
29540 .mr(1)
29541 .nr(4)
29542 .kr(1)
29543 .sr(1)
29544 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080029545 .n(n)
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029546 .k(k)
29547 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080029548 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029549 }
29550 }
29551 }
29552
29553 TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, strided_cm_subtile) {
29554 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029555 for (uint32_t n = 1; n <= 4; n++) {
29556 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029557 GemmMicrokernelTester()
29558 .mr(1)
29559 .nr(4)
29560 .kr(1)
29561 .sr(1)
29562 .m(m)
29563 .n(n)
29564 .k(k)
29565 .cm_stride(7)
29566 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029567 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029568 }
29569 }
29570 }
29571 }
29572
29573 TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, a_offset) {
29574 for (size_t k = 1; k <= 5; k += 2) {
29575 GemmMicrokernelTester()
29576 .mr(1)
29577 .nr(4)
29578 .kr(1)
29579 .sr(1)
29580 .m(1)
29581 .n(4)
29582 .k(k)
29583 .ks(3)
29584 .a_offset(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080029585 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029586 }
29587 }
29588
29589 TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029590 for (size_t k = 1; k <= 5; k += 2) {
29591 for (uint32_t mz = 0; mz < 1; mz++) {
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029592 GemmMicrokernelTester()
29593 .mr(1)
29594 .nr(4)
29595 .kr(1)
29596 .sr(1)
29597 .m(1)
29598 .n(4)
29599 .k(k)
29600 .ks(3)
29601 .a_offset(7)
29602 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080029603 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029604 }
29605 }
29606 }
29607
29608 TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, qmin) {
29609 GemmMicrokernelTester()
29610 .mr(1)
29611 .nr(4)
29612 .kr(1)
29613 .sr(1)
29614 .m(1)
29615 .n(4)
29616 .k(1)
29617 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080029618 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029619 }
29620
29621 TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, qmax) {
29622 GemmMicrokernelTester()
29623 .mr(1)
29624 .nr(4)
29625 .kr(1)
29626 .sr(1)
29627 .m(1)
29628 .n(4)
29629 .k(1)
29630 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080029631 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029632 }
29633
29634 TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, strided_cm) {
29635 GemmMicrokernelTester()
29636 .mr(1)
29637 .nr(4)
29638 .kr(1)
29639 .sr(1)
29640 .m(1)
29641 .n(4)
29642 .k(1)
29643 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080029644 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029645 }
29646
29647 TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, no_a_zero_point) {
29648 for (size_t k = 1; k <= 5; k += 2) {
29649 GemmMicrokernelTester()
29650 .mr(1)
29651 .nr(4)
29652 .kr(1)
29653 .sr(1)
29654 .m(1)
29655 .n(4)
29656 .k(k)
29657 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080029658 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029659 }
29660 }
29661
29662 TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, no_b_zero_point) {
29663 for (size_t k = 1; k <= 5; k += 2) {
29664 GemmMicrokernelTester()
29665 .mr(1)
29666 .nr(4)
29667 .kr(1)
29668 .sr(1)
29669 .m(1)
29670 .n(4)
29671 .k(k)
29672 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080029673 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029674 }
29675 }
29676
29677 TEST(QU8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, no_zero_point) {
29678 for (size_t k = 1; k <= 5; k += 2) {
29679 GemmMicrokernelTester()
29680 .mr(1)
29681 .nr(4)
29682 .kr(1)
29683 .sr(1)
29684 .m(1)
29685 .n(4)
29686 .k(k)
29687 .a_zero_point(0)
29688 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080029689 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029690 }
29691 }
29692#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
29693
29694
29695#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
29696 TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, k_eq_1) {
29697 GemmMicrokernelTester()
29698 .mr(2)
29699 .nr(4)
29700 .kr(1)
29701 .sr(1)
29702 .m(2)
29703 .n(4)
29704 .k(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029705 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029706 }
29707
29708 TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, strided_cn) {
29709 GemmMicrokernelTester()
29710 .mr(2)
29711 .nr(4)
29712 .kr(1)
29713 .sr(1)
29714 .m(2)
29715 .n(4)
29716 .k(1)
29717 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080029718 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029719 }
29720
29721 TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029722 for (uint32_t n = 1; n <= 4; n++) {
29723 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029724 GemmMicrokernelTester()
29725 .mr(2)
29726 .nr(4)
29727 .kr(1)
29728 .sr(1)
29729 .m(m)
29730 .n(n)
29731 .k(1)
29732 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029733 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029734 }
29735 }
29736 }
29737
29738 TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, k_eq_1_subtile_m) {
29739 for (uint32_t m = 1; m <= 2; m++) {
29740 GemmMicrokernelTester()
29741 .mr(2)
29742 .nr(4)
29743 .kr(1)
29744 .sr(1)
29745 .m(m)
29746 .n(4)
29747 .k(1)
29748 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029749 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029750 }
29751 }
29752
29753 TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, k_eq_1_subtile_n) {
29754 for (uint32_t n = 1; n <= 4; n++) {
29755 GemmMicrokernelTester()
29756 .mr(2)
29757 .nr(4)
29758 .kr(1)
29759 .sr(1)
29760 .m(2)
29761 .n(n)
29762 .k(1)
29763 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029764 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029765 }
29766 }
29767
29768 TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, k_gt_1) {
29769 for (size_t k = 2; k < 10; k++) {
29770 GemmMicrokernelTester()
29771 .mr(2)
29772 .nr(4)
29773 .kr(1)
29774 .sr(1)
29775 .m(2)
29776 .n(4)
29777 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080029778 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029779 }
29780 }
29781
29782 TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, k_gt_1_subtile) {
29783 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029784 for (uint32_t n = 1; n <= 4; n++) {
29785 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029786 GemmMicrokernelTester()
29787 .mr(2)
29788 .nr(4)
29789 .kr(1)
29790 .sr(1)
29791 .m(m)
29792 .n(n)
29793 .k(k)
29794 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029795 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029796 }
29797 }
29798 }
29799 }
29800
29801 TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_gt_4) {
29802 for (uint32_t n = 5; n < 8; n++) {
29803 for (size_t k = 1; k <= 5; k += 2) {
29804 GemmMicrokernelTester()
29805 .mr(2)
29806 .nr(4)
29807 .kr(1)
29808 .sr(1)
29809 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080029810 .n(n)
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029811 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080029812 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029813 }
29814 }
29815 }
29816
29817 TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_gt_4_strided_cn) {
29818 for (uint32_t n = 5; n < 8; n++) {
29819 for (size_t k = 1; k <= 5; k += 2) {
29820 GemmMicrokernelTester()
29821 .mr(2)
29822 .nr(4)
29823 .kr(1)
29824 .sr(1)
29825 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080029826 .n(n)
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029827 .k(k)
29828 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080029829 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029830 }
29831 }
29832 }
29833
29834 TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_gt_4_subtile) {
29835 for (uint32_t n = 5; n < 8; n++) {
29836 for (size_t k = 1; k <= 5; k += 2) {
29837 for (uint32_t m = 1; m <= 2; m++) {
29838 GemmMicrokernelTester()
29839 .mr(2)
29840 .nr(4)
29841 .kr(1)
29842 .sr(1)
29843 .m(m)
29844 .n(n)
29845 .k(k)
29846 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029847 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029848 }
29849 }
29850 }
29851 }
29852
29853 TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_div_4) {
29854 for (uint32_t n = 8; n <= 12; n += 4) {
29855 for (size_t k = 1; k <= 5; k += 2) {
29856 GemmMicrokernelTester()
29857 .mr(2)
29858 .nr(4)
29859 .kr(1)
29860 .sr(1)
29861 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080029862 .n(n)
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029863 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080029864 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029865 }
29866 }
29867 }
29868
29869 TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_div_4_strided_cn) {
29870 for (uint32_t n = 8; n <= 12; n += 4) {
29871 for (size_t k = 1; k <= 5; k += 2) {
29872 GemmMicrokernelTester()
29873 .mr(2)
29874 .nr(4)
29875 .kr(1)
29876 .sr(1)
29877 .m(2)
29878 .n(n)
29879 .k(k)
29880 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080029881 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029882 }
29883 }
29884 }
29885
29886 TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_div_4_subtile) {
29887 for (uint32_t n = 8; n <= 12; n += 4) {
29888 for (size_t k = 1; k <= 5; k += 2) {
29889 for (uint32_t m = 1; m <= 2; m++) {
29890 GemmMicrokernelTester()
29891 .mr(2)
29892 .nr(4)
29893 .kr(1)
29894 .sr(1)
29895 .m(m)
29896 .n(n)
29897 .k(k)
29898 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029899 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029900 }
29901 }
29902 }
29903 }
29904
29905 TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, small_kernel) {
29906 for (size_t k = 1; k <= 5; k += 2) {
29907 GemmMicrokernelTester()
29908 .mr(2)
29909 .nr(4)
29910 .kr(1)
29911 .sr(1)
29912 .m(2)
29913 .n(4)
29914 .k(k)
29915 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080029916 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029917 }
29918 }
29919
29920 TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, small_kernel_subtile) {
29921 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029922 for (uint32_t n = 1; n <= 4; n++) {
29923 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029924 GemmMicrokernelTester()
29925 .mr(2)
29926 .nr(4)
29927 .kr(1)
29928 .sr(1)
29929 .m(m)
29930 .n(n)
29931 .k(k)
29932 .ks(3)
29933 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029934 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029935 }
29936 }
29937 }
29938 }
29939
29940 TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_gt_4_small_kernel) {
29941 for (uint32_t n = 5; n < 8; n++) {
29942 for (size_t k = 1; k <= 5; k += 2) {
29943 GemmMicrokernelTester()
29944 .mr(2)
29945 .nr(4)
29946 .kr(1)
29947 .sr(1)
29948 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080029949 .n(n)
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029950 .k(k)
29951 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080029952 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029953 }
29954 }
29955 }
29956
29957 TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_div_4_small_kernel) {
29958 for (uint32_t n = 8; n <= 12; n += 4) {
29959 for (size_t k = 1; k <= 5; k += 2) {
29960 GemmMicrokernelTester()
29961 .mr(2)
29962 .nr(4)
29963 .kr(1)
29964 .sr(1)
29965 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080029966 .n(n)
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029967 .k(k)
29968 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080029969 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029970 }
29971 }
29972 }
29973
29974 TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, strided_cm_subtile) {
29975 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080029976 for (uint32_t n = 1; n <= 4; n++) {
29977 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029978 GemmMicrokernelTester()
29979 .mr(2)
29980 .nr(4)
29981 .kr(1)
29982 .sr(1)
29983 .m(m)
29984 .n(n)
29985 .k(k)
29986 .cm_stride(7)
29987 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080029988 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080029989 }
29990 }
29991 }
29992 }
29993
29994 TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, a_offset) {
29995 for (size_t k = 1; k <= 5; k += 2) {
29996 GemmMicrokernelTester()
29997 .mr(2)
29998 .nr(4)
29999 .kr(1)
30000 .sr(1)
30001 .m(2)
30002 .n(4)
30003 .k(k)
30004 .ks(3)
30005 .a_offset(13)
Marat Dukhan50323b82022-01-11 00:12:01 -080030006 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080030007 }
30008 }
30009
30010 TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030011 for (size_t k = 1; k <= 5; k += 2) {
30012 for (uint32_t mz = 0; mz < 2; mz++) {
Marat Dukhan7c1115f2022-01-04 17:18:41 -080030013 GemmMicrokernelTester()
30014 .mr(2)
30015 .nr(4)
30016 .kr(1)
30017 .sr(1)
30018 .m(2)
30019 .n(4)
30020 .k(k)
30021 .ks(3)
30022 .a_offset(13)
30023 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080030024 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080030025 }
30026 }
30027 }
30028
30029 TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, qmin) {
30030 GemmMicrokernelTester()
30031 .mr(2)
30032 .nr(4)
30033 .kr(1)
30034 .sr(1)
30035 .m(2)
30036 .n(4)
30037 .k(1)
30038 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080030039 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080030040 }
30041
30042 TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, qmax) {
30043 GemmMicrokernelTester()
30044 .mr(2)
30045 .nr(4)
30046 .kr(1)
30047 .sr(1)
30048 .m(2)
30049 .n(4)
30050 .k(1)
30051 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080030052 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080030053 }
30054
30055 TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, strided_cm) {
30056 GemmMicrokernelTester()
30057 .mr(2)
30058 .nr(4)
30059 .kr(1)
30060 .sr(1)
30061 .m(2)
30062 .n(4)
30063 .k(1)
30064 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080030065 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080030066 }
30067
30068 TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, no_a_zero_point) {
30069 for (size_t k = 1; k <= 5; k += 2) {
30070 GemmMicrokernelTester()
30071 .mr(2)
30072 .nr(4)
30073 .kr(1)
30074 .sr(1)
30075 .m(2)
30076 .n(4)
30077 .k(k)
30078 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080030079 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080030080 }
30081 }
30082
30083 TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, no_b_zero_point) {
30084 for (size_t k = 1; k <= 5; k += 2) {
30085 GemmMicrokernelTester()
30086 .mr(2)
30087 .nr(4)
30088 .kr(1)
30089 .sr(1)
30090 .m(2)
30091 .n(4)
30092 .k(k)
30093 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080030094 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080030095 }
30096 }
30097
30098 TEST(QU8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, no_zero_point) {
30099 for (size_t k = 1; k <= 5; k += 2) {
30100 GemmMicrokernelTester()
30101 .mr(2)
30102 .nr(4)
30103 .kr(1)
30104 .sr(1)
30105 .m(2)
30106 .n(4)
30107 .k(k)
30108 .a_zero_point(0)
30109 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080030110 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080030111 }
30112 }
30113#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
30114
30115
30116#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan7c1115f2022-01-04 17:18:41 -080030117 TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, k_eq_1) {
30118 GemmMicrokernelTester()
30119 .mr(4)
30120 .nr(4)
30121 .kr(1)
30122 .sr(1)
30123 .m(4)
30124 .n(4)
30125 .k(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030126 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080030127 }
30128
30129 TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, strided_cn) {
30130 GemmMicrokernelTester()
30131 .mr(4)
30132 .nr(4)
30133 .kr(1)
30134 .sr(1)
30135 .m(4)
30136 .n(4)
30137 .k(1)
30138 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080030139 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080030140 }
30141
30142 TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030143 for (uint32_t n = 1; n <= 4; n++) {
30144 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan7c1115f2022-01-04 17:18:41 -080030145 GemmMicrokernelTester()
30146 .mr(4)
30147 .nr(4)
30148 .kr(1)
30149 .sr(1)
30150 .m(m)
30151 .n(n)
30152 .k(1)
30153 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030154 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080030155 }
30156 }
30157 }
30158
30159 TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, k_eq_1_subtile_m) {
30160 for (uint32_t m = 1; m <= 4; m++) {
30161 GemmMicrokernelTester()
30162 .mr(4)
30163 .nr(4)
30164 .kr(1)
30165 .sr(1)
30166 .m(m)
30167 .n(4)
30168 .k(1)
30169 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030170 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080030171 }
30172 }
30173
30174 TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, k_eq_1_subtile_n) {
30175 for (uint32_t n = 1; n <= 4; n++) {
30176 GemmMicrokernelTester()
30177 .mr(4)
30178 .nr(4)
30179 .kr(1)
30180 .sr(1)
30181 .m(4)
30182 .n(n)
30183 .k(1)
30184 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030185 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080030186 }
30187 }
30188
30189 TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, k_gt_1) {
30190 for (size_t k = 2; k < 10; k++) {
30191 GemmMicrokernelTester()
30192 .mr(4)
30193 .nr(4)
30194 .kr(1)
30195 .sr(1)
30196 .m(4)
30197 .n(4)
30198 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080030199 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080030200 }
30201 }
30202
30203 TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, k_gt_1_subtile) {
30204 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030205 for (uint32_t n = 1; n <= 4; n++) {
30206 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan7c1115f2022-01-04 17:18:41 -080030207 GemmMicrokernelTester()
30208 .mr(4)
30209 .nr(4)
30210 .kr(1)
30211 .sr(1)
30212 .m(m)
30213 .n(n)
30214 .k(k)
30215 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030216 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080030217 }
30218 }
30219 }
30220 }
30221
30222 TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_gt_4) {
30223 for (uint32_t n = 5; n < 8; n++) {
30224 for (size_t k = 1; k <= 5; k += 2) {
30225 GemmMicrokernelTester()
30226 .mr(4)
30227 .nr(4)
30228 .kr(1)
30229 .sr(1)
30230 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080030231 .n(n)
Marat Dukhan7c1115f2022-01-04 17:18:41 -080030232 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080030233 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080030234 }
30235 }
30236 }
30237
30238 TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_gt_4_strided_cn) {
30239 for (uint32_t n = 5; n < 8; n++) {
30240 for (size_t k = 1; k <= 5; k += 2) {
30241 GemmMicrokernelTester()
30242 .mr(4)
30243 .nr(4)
30244 .kr(1)
30245 .sr(1)
30246 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080030247 .n(n)
Marat Dukhan7c1115f2022-01-04 17:18:41 -080030248 .k(k)
30249 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080030250 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080030251 }
30252 }
30253 }
30254
30255 TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_gt_4_subtile) {
30256 for (uint32_t n = 5; n < 8; n++) {
30257 for (size_t k = 1; k <= 5; k += 2) {
30258 for (uint32_t m = 1; m <= 4; m++) {
30259 GemmMicrokernelTester()
30260 .mr(4)
30261 .nr(4)
30262 .kr(1)
30263 .sr(1)
30264 .m(m)
30265 .n(n)
30266 .k(k)
30267 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030268 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080030269 }
30270 }
30271 }
30272 }
30273
30274 TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_div_4) {
30275 for (uint32_t n = 8; n <= 12; n += 4) {
30276 for (size_t k = 1; k <= 5; k += 2) {
30277 GemmMicrokernelTester()
30278 .mr(4)
30279 .nr(4)
30280 .kr(1)
30281 .sr(1)
30282 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080030283 .n(n)
Marat Dukhan7c1115f2022-01-04 17:18:41 -080030284 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080030285 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080030286 }
30287 }
30288 }
30289
30290 TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_div_4_strided_cn) {
30291 for (uint32_t n = 8; n <= 12; n += 4) {
30292 for (size_t k = 1; k <= 5; k += 2) {
30293 GemmMicrokernelTester()
30294 .mr(4)
30295 .nr(4)
30296 .kr(1)
30297 .sr(1)
30298 .m(4)
30299 .n(n)
30300 .k(k)
30301 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080030302 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080030303 }
30304 }
30305 }
30306
30307 TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_div_4_subtile) {
30308 for (uint32_t n = 8; n <= 12; n += 4) {
30309 for (size_t k = 1; k <= 5; k += 2) {
30310 for (uint32_t m = 1; m <= 4; m++) {
30311 GemmMicrokernelTester()
30312 .mr(4)
30313 .nr(4)
30314 .kr(1)
30315 .sr(1)
30316 .m(m)
30317 .n(n)
30318 .k(k)
30319 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030320 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080030321 }
30322 }
30323 }
30324 }
30325
30326 TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, small_kernel) {
30327 for (size_t k = 1; k <= 5; k += 2) {
30328 GemmMicrokernelTester()
30329 .mr(4)
30330 .nr(4)
30331 .kr(1)
30332 .sr(1)
30333 .m(4)
30334 .n(4)
30335 .k(k)
30336 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080030337 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080030338 }
30339 }
30340
30341 TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, small_kernel_subtile) {
30342 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030343 for (uint32_t n = 1; n <= 4; n++) {
30344 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan7c1115f2022-01-04 17:18:41 -080030345 GemmMicrokernelTester()
30346 .mr(4)
30347 .nr(4)
30348 .kr(1)
30349 .sr(1)
30350 .m(m)
30351 .n(n)
30352 .k(k)
30353 .ks(3)
30354 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030355 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080030356 }
30357 }
30358 }
30359 }
30360
30361 TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_gt_4_small_kernel) {
30362 for (uint32_t n = 5; n < 8; n++) {
30363 for (size_t k = 1; k <= 5; k += 2) {
30364 GemmMicrokernelTester()
30365 .mr(4)
30366 .nr(4)
30367 .kr(1)
30368 .sr(1)
30369 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080030370 .n(n)
Marat Dukhan7c1115f2022-01-04 17:18:41 -080030371 .k(k)
30372 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080030373 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080030374 }
30375 }
30376 }
30377
30378 TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_div_4_small_kernel) {
30379 for (uint32_t n = 8; n <= 12; n += 4) {
30380 for (size_t k = 1; k <= 5; k += 2) {
30381 GemmMicrokernelTester()
30382 .mr(4)
30383 .nr(4)
30384 .kr(1)
30385 .sr(1)
30386 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080030387 .n(n)
Marat Dukhan7c1115f2022-01-04 17:18:41 -080030388 .k(k)
30389 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080030390 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080030391 }
30392 }
30393 }
30394
30395 TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, strided_cm_subtile) {
30396 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030397 for (uint32_t n = 1; n <= 4; n++) {
30398 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan7c1115f2022-01-04 17:18:41 -080030399 GemmMicrokernelTester()
30400 .mr(4)
30401 .nr(4)
30402 .kr(1)
30403 .sr(1)
30404 .m(m)
30405 .n(n)
30406 .k(k)
30407 .cm_stride(7)
30408 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030409 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080030410 }
30411 }
30412 }
30413 }
30414
30415 TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, a_offset) {
30416 for (size_t k = 1; k <= 5; k += 2) {
30417 GemmMicrokernelTester()
30418 .mr(4)
30419 .nr(4)
30420 .kr(1)
30421 .sr(1)
30422 .m(4)
30423 .n(4)
30424 .k(k)
30425 .ks(3)
30426 .a_offset(23)
Marat Dukhan50323b82022-01-11 00:12:01 -080030427 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080030428 }
30429 }
30430
30431 TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030432 for (size_t k = 1; k <= 5; k += 2) {
30433 for (uint32_t mz = 0; mz < 4; mz++) {
Marat Dukhan7c1115f2022-01-04 17:18:41 -080030434 GemmMicrokernelTester()
30435 .mr(4)
30436 .nr(4)
30437 .kr(1)
30438 .sr(1)
30439 .m(4)
30440 .n(4)
30441 .k(k)
30442 .ks(3)
30443 .a_offset(23)
30444 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080030445 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080030446 }
30447 }
30448 }
30449
30450 TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, qmin) {
30451 GemmMicrokernelTester()
30452 .mr(4)
30453 .nr(4)
30454 .kr(1)
30455 .sr(1)
30456 .m(4)
30457 .n(4)
30458 .k(1)
30459 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080030460 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080030461 }
30462
30463 TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, qmax) {
30464 GemmMicrokernelTester()
30465 .mr(4)
30466 .nr(4)
30467 .kr(1)
30468 .sr(1)
30469 .m(4)
30470 .n(4)
30471 .k(1)
30472 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080030473 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080030474 }
30475
30476 TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, strided_cm) {
30477 GemmMicrokernelTester()
30478 .mr(4)
30479 .nr(4)
30480 .kr(1)
30481 .sr(1)
30482 .m(4)
30483 .n(4)
30484 .k(1)
30485 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080030486 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080030487 }
30488
30489 TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, no_a_zero_point) {
30490 for (size_t k = 1; k <= 5; k += 2) {
30491 GemmMicrokernelTester()
30492 .mr(4)
30493 .nr(4)
30494 .kr(1)
30495 .sr(1)
30496 .m(4)
30497 .n(4)
30498 .k(k)
30499 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080030500 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080030501 }
30502 }
30503
30504 TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, no_b_zero_point) {
30505 for (size_t k = 1; k <= 5; k += 2) {
30506 GemmMicrokernelTester()
30507 .mr(4)
30508 .nr(4)
30509 .kr(1)
30510 .sr(1)
30511 .m(4)
30512 .n(4)
30513 .k(k)
30514 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080030515 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080030516 }
30517 }
30518
30519 TEST(QU8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, no_zero_point) {
30520 for (size_t k = 1; k <= 5; k += 2) {
30521 GemmMicrokernelTester()
30522 .mr(4)
30523 .nr(4)
30524 .kr(1)
30525 .sr(1)
30526 .m(4)
30527 .n(4)
30528 .k(k)
30529 .a_zero_point(0)
30530 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080030531 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan7c1115f2022-01-04 17:18:41 -080030532 }
30533 }
30534#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
30535
30536
Marat Dukhan2ac722e2022-01-04 01:54:20 -080030537TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, k_eq_1) {
Marat Dukhan927d4742021-07-15 13:42:49 -070030538 GemmMicrokernelTester()
30539 .mr(1)
30540 .nr(2)
30541 .kr(1)
30542 .sr(1)
30543 .m(1)
30544 .n(2)
30545 .k(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030546 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070030547}
30548
Marat Dukhan2ac722e2022-01-04 01:54:20 -080030549TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, strided_cn) {
Marat Dukhan927d4742021-07-15 13:42:49 -070030550 GemmMicrokernelTester()
30551 .mr(1)
30552 .nr(2)
30553 .kr(1)
30554 .sr(1)
30555 .m(1)
30556 .n(2)
30557 .k(1)
30558 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080030559 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070030560}
30561
Marat Dukhan2ac722e2022-01-04 01:54:20 -080030562TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030563 for (uint32_t n = 1; n <= 2; n++) {
30564 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan927d4742021-07-15 13:42:49 -070030565 GemmMicrokernelTester()
30566 .mr(1)
30567 .nr(2)
30568 .kr(1)
30569 .sr(1)
30570 .m(m)
30571 .n(n)
30572 .k(1)
30573 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030574 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070030575 }
30576 }
30577}
30578
Marat Dukhan2ac722e2022-01-04 01:54:20 -080030579TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, k_eq_1_subtile_m) {
Marat Dukhan927d4742021-07-15 13:42:49 -070030580 for (uint32_t m = 1; m <= 1; m++) {
30581 GemmMicrokernelTester()
30582 .mr(1)
30583 .nr(2)
30584 .kr(1)
30585 .sr(1)
30586 .m(m)
30587 .n(2)
30588 .k(1)
30589 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030590 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070030591 }
30592}
30593
Marat Dukhan2ac722e2022-01-04 01:54:20 -080030594TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, k_eq_1_subtile_n) {
Marat Dukhan927d4742021-07-15 13:42:49 -070030595 for (uint32_t n = 1; n <= 2; n++) {
30596 GemmMicrokernelTester()
30597 .mr(1)
30598 .nr(2)
30599 .kr(1)
30600 .sr(1)
30601 .m(1)
30602 .n(n)
30603 .k(1)
30604 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030605 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070030606 }
30607}
30608
Marat Dukhan2ac722e2022-01-04 01:54:20 -080030609TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, k_gt_1) {
Marat Dukhan927d4742021-07-15 13:42:49 -070030610 for (size_t k = 2; k < 10; k++) {
30611 GemmMicrokernelTester()
30612 .mr(1)
30613 .nr(2)
30614 .kr(1)
30615 .sr(1)
30616 .m(1)
30617 .n(2)
30618 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080030619 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070030620 }
30621}
30622
Marat Dukhan2ac722e2022-01-04 01:54:20 -080030623TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, k_gt_1_subtile) {
Marat Dukhan927d4742021-07-15 13:42:49 -070030624 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030625 for (uint32_t n = 1; n <= 2; n++) {
30626 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan927d4742021-07-15 13:42:49 -070030627 GemmMicrokernelTester()
30628 .mr(1)
30629 .nr(2)
30630 .kr(1)
30631 .sr(1)
30632 .m(m)
30633 .n(n)
30634 .k(k)
30635 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030636 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070030637 }
30638 }
30639 }
30640}
30641
Marat Dukhan2ac722e2022-01-04 01:54:20 -080030642TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, n_gt_2) {
Marat Dukhan927d4742021-07-15 13:42:49 -070030643 for (uint32_t n = 3; n < 4; n++) {
30644 for (size_t k = 1; k <= 5; k += 2) {
30645 GemmMicrokernelTester()
30646 .mr(1)
30647 .nr(2)
30648 .kr(1)
30649 .sr(1)
30650 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080030651 .n(n)
Marat Dukhan927d4742021-07-15 13:42:49 -070030652 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080030653 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070030654 }
30655 }
30656}
30657
Marat Dukhan2ac722e2022-01-04 01:54:20 -080030658TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, n_gt_2_strided_cn) {
Marat Dukhan927d4742021-07-15 13:42:49 -070030659 for (uint32_t n = 3; n < 4; n++) {
30660 for (size_t k = 1; k <= 5; k += 2) {
30661 GemmMicrokernelTester()
30662 .mr(1)
30663 .nr(2)
30664 .kr(1)
30665 .sr(1)
30666 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080030667 .n(n)
Marat Dukhan927d4742021-07-15 13:42:49 -070030668 .k(k)
30669 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080030670 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070030671 }
30672 }
30673}
30674
Marat Dukhan2ac722e2022-01-04 01:54:20 -080030675TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, n_gt_2_subtile) {
Marat Dukhan927d4742021-07-15 13:42:49 -070030676 for (uint32_t n = 3; n < 4; n++) {
30677 for (size_t k = 1; k <= 5; k += 2) {
30678 for (uint32_t m = 1; m <= 1; m++) {
30679 GemmMicrokernelTester()
30680 .mr(1)
30681 .nr(2)
30682 .kr(1)
30683 .sr(1)
30684 .m(m)
30685 .n(n)
30686 .k(k)
30687 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030688 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070030689 }
30690 }
30691 }
30692}
30693
Marat Dukhan2ac722e2022-01-04 01:54:20 -080030694TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, n_div_2) {
Marat Dukhan927d4742021-07-15 13:42:49 -070030695 for (uint32_t n = 4; n <= 6; n += 2) {
30696 for (size_t k = 1; k <= 5; k += 2) {
30697 GemmMicrokernelTester()
30698 .mr(1)
30699 .nr(2)
30700 .kr(1)
30701 .sr(1)
30702 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080030703 .n(n)
Marat Dukhan927d4742021-07-15 13:42:49 -070030704 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080030705 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070030706 }
30707 }
30708}
30709
Marat Dukhan2ac722e2022-01-04 01:54:20 -080030710TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, n_div_2_strided_cn) {
Marat Dukhan927d4742021-07-15 13:42:49 -070030711 for (uint32_t n = 4; n <= 6; n += 2) {
30712 for (size_t k = 1; k <= 5; k += 2) {
30713 GemmMicrokernelTester()
30714 .mr(1)
30715 .nr(2)
30716 .kr(1)
30717 .sr(1)
30718 .m(1)
30719 .n(n)
30720 .k(k)
30721 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080030722 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070030723 }
30724 }
30725}
30726
Marat Dukhan2ac722e2022-01-04 01:54:20 -080030727TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, n_div_2_subtile) {
Marat Dukhan927d4742021-07-15 13:42:49 -070030728 for (uint32_t n = 4; n <= 6; n += 2) {
30729 for (size_t k = 1; k <= 5; k += 2) {
30730 for (uint32_t m = 1; m <= 1; m++) {
30731 GemmMicrokernelTester()
30732 .mr(1)
30733 .nr(2)
30734 .kr(1)
30735 .sr(1)
30736 .m(m)
30737 .n(n)
30738 .k(k)
30739 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030740 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070030741 }
30742 }
30743 }
30744}
30745
Marat Dukhan2ac722e2022-01-04 01:54:20 -080030746TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, small_kernel) {
Marat Dukhan927d4742021-07-15 13:42:49 -070030747 for (size_t k = 1; k <= 5; k += 2) {
30748 GemmMicrokernelTester()
30749 .mr(1)
30750 .nr(2)
30751 .kr(1)
30752 .sr(1)
30753 .m(1)
30754 .n(2)
30755 .k(k)
30756 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080030757 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070030758 }
30759}
30760
Marat Dukhan2ac722e2022-01-04 01:54:20 -080030761TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, small_kernel_subtile) {
Marat Dukhan927d4742021-07-15 13:42:49 -070030762 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030763 for (uint32_t n = 1; n <= 2; n++) {
30764 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan927d4742021-07-15 13:42:49 -070030765 GemmMicrokernelTester()
30766 .mr(1)
30767 .nr(2)
30768 .kr(1)
30769 .sr(1)
30770 .m(m)
30771 .n(n)
30772 .k(k)
30773 .ks(3)
30774 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030775 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070030776 }
30777 }
30778 }
30779}
30780
Marat Dukhan2ac722e2022-01-04 01:54:20 -080030781TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, n_gt_2_small_kernel) {
Marat Dukhan927d4742021-07-15 13:42:49 -070030782 for (uint32_t n = 3; n < 4; n++) {
30783 for (size_t k = 1; k <= 5; k += 2) {
30784 GemmMicrokernelTester()
30785 .mr(1)
30786 .nr(2)
30787 .kr(1)
30788 .sr(1)
30789 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080030790 .n(n)
Marat Dukhan927d4742021-07-15 13:42:49 -070030791 .k(k)
30792 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080030793 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070030794 }
30795 }
30796}
30797
Marat Dukhan2ac722e2022-01-04 01:54:20 -080030798TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, n_div_2_small_kernel) {
Marat Dukhan927d4742021-07-15 13:42:49 -070030799 for (uint32_t n = 4; n <= 6; n += 2) {
30800 for (size_t k = 1; k <= 5; k += 2) {
30801 GemmMicrokernelTester()
30802 .mr(1)
30803 .nr(2)
30804 .kr(1)
30805 .sr(1)
30806 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080030807 .n(n)
Marat Dukhan927d4742021-07-15 13:42:49 -070030808 .k(k)
30809 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080030810 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070030811 }
30812 }
30813}
30814
Marat Dukhan2ac722e2022-01-04 01:54:20 -080030815TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, strided_cm_subtile) {
Marat Dukhan927d4742021-07-15 13:42:49 -070030816 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030817 for (uint32_t n = 1; n <= 2; n++) {
30818 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan927d4742021-07-15 13:42:49 -070030819 GemmMicrokernelTester()
30820 .mr(1)
30821 .nr(2)
30822 .kr(1)
30823 .sr(1)
30824 .m(m)
30825 .n(n)
30826 .k(k)
30827 .cm_stride(5)
30828 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030829 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070030830 }
30831 }
30832 }
30833}
30834
Marat Dukhan2ac722e2022-01-04 01:54:20 -080030835TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, a_offset) {
Marat Dukhan927d4742021-07-15 13:42:49 -070030836 for (size_t k = 1; k <= 5; k += 2) {
30837 GemmMicrokernelTester()
30838 .mr(1)
30839 .nr(2)
30840 .kr(1)
30841 .sr(1)
30842 .m(1)
30843 .n(2)
30844 .k(k)
30845 .ks(3)
30846 .a_offset(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080030847 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070030848 }
30849}
30850
Marat Dukhan2ac722e2022-01-04 01:54:20 -080030851TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030852 for (size_t k = 1; k <= 5; k += 2) {
30853 for (uint32_t mz = 0; mz < 1; mz++) {
Marat Dukhan927d4742021-07-15 13:42:49 -070030854 GemmMicrokernelTester()
30855 .mr(1)
30856 .nr(2)
30857 .kr(1)
30858 .sr(1)
30859 .m(1)
30860 .n(2)
30861 .k(k)
30862 .ks(3)
30863 .a_offset(7)
30864 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080030865 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070030866 }
30867 }
30868}
30869
Marat Dukhan2ac722e2022-01-04 01:54:20 -080030870TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, qmin) {
Marat Dukhan927d4742021-07-15 13:42:49 -070030871 GemmMicrokernelTester()
30872 .mr(1)
30873 .nr(2)
30874 .kr(1)
30875 .sr(1)
30876 .m(1)
30877 .n(2)
30878 .k(1)
30879 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080030880 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070030881}
30882
Marat Dukhan2ac722e2022-01-04 01:54:20 -080030883TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, qmax) {
Marat Dukhan927d4742021-07-15 13:42:49 -070030884 GemmMicrokernelTester()
30885 .mr(1)
30886 .nr(2)
30887 .kr(1)
30888 .sr(1)
30889 .m(1)
30890 .n(2)
30891 .k(1)
30892 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080030893 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070030894}
30895
Marat Dukhan2ac722e2022-01-04 01:54:20 -080030896TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, strided_cm) {
Marat Dukhan927d4742021-07-15 13:42:49 -070030897 GemmMicrokernelTester()
30898 .mr(1)
30899 .nr(2)
30900 .kr(1)
30901 .sr(1)
30902 .m(1)
30903 .n(2)
30904 .k(1)
30905 .cm_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080030906 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070030907}
30908
Marat Dukhan2ac722e2022-01-04 01:54:20 -080030909TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, no_a_zero_point) {
Marat Dukhan927d4742021-07-15 13:42:49 -070030910 for (size_t k = 1; k <= 5; k += 2) {
30911 GemmMicrokernelTester()
30912 .mr(1)
30913 .nr(2)
30914 .kr(1)
30915 .sr(1)
30916 .m(1)
30917 .n(2)
30918 .k(k)
30919 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080030920 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070030921 }
30922}
30923
Marat Dukhan2ac722e2022-01-04 01:54:20 -080030924TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, no_b_zero_point) {
Marat Dukhan927d4742021-07-15 13:42:49 -070030925 for (size_t k = 1; k <= 5; k += 2) {
30926 GemmMicrokernelTester()
30927 .mr(1)
30928 .nr(2)
30929 .kr(1)
30930 .sr(1)
30931 .m(1)
30932 .n(2)
30933 .k(k)
30934 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080030935 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070030936 }
30937}
30938
Marat Dukhan2ac722e2022-01-04 01:54:20 -080030939TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, no_zero_point) {
Marat Dukhan927d4742021-07-15 13:42:49 -070030940 for (size_t k = 1; k <= 5; k += 2) {
30941 GemmMicrokernelTester()
30942 .mr(1)
30943 .nr(2)
30944 .kr(1)
30945 .sr(1)
30946 .m(1)
30947 .n(2)
30948 .k(k)
30949 .a_zero_point(0)
30950 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080030951 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070030952 }
30953}
30954
Marat Dukhan2ac722e2022-01-04 01:54:20 -080030955TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, k_eq_1) {
Marat Dukhan927d4742021-07-15 13:42:49 -070030956 GemmMicrokernelTester()
30957 .mr(2)
30958 .nr(2)
30959 .kr(1)
30960 .sr(1)
30961 .m(2)
30962 .n(2)
30963 .k(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030964 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070030965}
30966
Marat Dukhan2ac722e2022-01-04 01:54:20 -080030967TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, strided_cn) {
Marat Dukhan927d4742021-07-15 13:42:49 -070030968 GemmMicrokernelTester()
30969 .mr(2)
30970 .nr(2)
30971 .kr(1)
30972 .sr(1)
30973 .m(2)
30974 .n(2)
30975 .k(1)
30976 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080030977 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070030978}
30979
Marat Dukhan2ac722e2022-01-04 01:54:20 -080030980TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080030981 for (uint32_t n = 1; n <= 2; n++) {
30982 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan927d4742021-07-15 13:42:49 -070030983 GemmMicrokernelTester()
30984 .mr(2)
30985 .nr(2)
30986 .kr(1)
30987 .sr(1)
30988 .m(m)
30989 .n(n)
30990 .k(1)
30991 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080030992 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070030993 }
30994 }
30995}
30996
Marat Dukhan2ac722e2022-01-04 01:54:20 -080030997TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, k_eq_1_subtile_m) {
Marat Dukhan927d4742021-07-15 13:42:49 -070030998 for (uint32_t m = 1; m <= 2; m++) {
30999 GemmMicrokernelTester()
31000 .mr(2)
31001 .nr(2)
31002 .kr(1)
31003 .sr(1)
31004 .m(m)
31005 .n(2)
31006 .k(1)
31007 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080031008 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070031009 }
31010}
31011
Marat Dukhan2ac722e2022-01-04 01:54:20 -080031012TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, k_eq_1_subtile_n) {
Marat Dukhan927d4742021-07-15 13:42:49 -070031013 for (uint32_t n = 1; n <= 2; n++) {
31014 GemmMicrokernelTester()
31015 .mr(2)
31016 .nr(2)
31017 .kr(1)
31018 .sr(1)
31019 .m(2)
31020 .n(n)
31021 .k(1)
31022 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080031023 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070031024 }
31025}
31026
Marat Dukhan2ac722e2022-01-04 01:54:20 -080031027TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, k_gt_1) {
Marat Dukhan927d4742021-07-15 13:42:49 -070031028 for (size_t k = 2; k < 10; k++) {
31029 GemmMicrokernelTester()
31030 .mr(2)
31031 .nr(2)
31032 .kr(1)
31033 .sr(1)
31034 .m(2)
31035 .n(2)
31036 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080031037 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070031038 }
31039}
31040
Marat Dukhan2ac722e2022-01-04 01:54:20 -080031041TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, k_gt_1_subtile) {
Marat Dukhan927d4742021-07-15 13:42:49 -070031042 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031043 for (uint32_t n = 1; n <= 2; n++) {
31044 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan927d4742021-07-15 13:42:49 -070031045 GemmMicrokernelTester()
31046 .mr(2)
31047 .nr(2)
31048 .kr(1)
31049 .sr(1)
31050 .m(m)
31051 .n(n)
31052 .k(k)
31053 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080031054 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070031055 }
31056 }
31057 }
31058}
31059
Marat Dukhan2ac722e2022-01-04 01:54:20 -080031060TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_gt_2) {
Marat Dukhan927d4742021-07-15 13:42:49 -070031061 for (uint32_t n = 3; n < 4; n++) {
31062 for (size_t k = 1; k <= 5; k += 2) {
31063 GemmMicrokernelTester()
31064 .mr(2)
31065 .nr(2)
31066 .kr(1)
31067 .sr(1)
31068 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080031069 .n(n)
Marat Dukhan927d4742021-07-15 13:42:49 -070031070 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080031071 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070031072 }
31073 }
31074}
31075
Marat Dukhan2ac722e2022-01-04 01:54:20 -080031076TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_gt_2_strided_cn) {
Marat Dukhan927d4742021-07-15 13:42:49 -070031077 for (uint32_t n = 3; n < 4; n++) {
31078 for (size_t k = 1; k <= 5; k += 2) {
31079 GemmMicrokernelTester()
31080 .mr(2)
31081 .nr(2)
31082 .kr(1)
31083 .sr(1)
31084 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080031085 .n(n)
Marat Dukhan927d4742021-07-15 13:42:49 -070031086 .k(k)
31087 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080031088 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070031089 }
31090 }
31091}
31092
Marat Dukhan2ac722e2022-01-04 01:54:20 -080031093TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_gt_2_subtile) {
Marat Dukhan927d4742021-07-15 13:42:49 -070031094 for (uint32_t n = 3; n < 4; n++) {
31095 for (size_t k = 1; k <= 5; k += 2) {
31096 for (uint32_t m = 1; m <= 2; m++) {
31097 GemmMicrokernelTester()
31098 .mr(2)
31099 .nr(2)
31100 .kr(1)
31101 .sr(1)
31102 .m(m)
31103 .n(n)
31104 .k(k)
31105 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080031106 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070031107 }
31108 }
31109 }
31110}
31111
Marat Dukhan2ac722e2022-01-04 01:54:20 -080031112TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_div_2) {
Marat Dukhan927d4742021-07-15 13:42:49 -070031113 for (uint32_t n = 4; n <= 6; n += 2) {
31114 for (size_t k = 1; k <= 5; k += 2) {
31115 GemmMicrokernelTester()
31116 .mr(2)
31117 .nr(2)
31118 .kr(1)
31119 .sr(1)
31120 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080031121 .n(n)
Marat Dukhan927d4742021-07-15 13:42:49 -070031122 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080031123 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070031124 }
31125 }
31126}
31127
Marat Dukhan2ac722e2022-01-04 01:54:20 -080031128TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_div_2_strided_cn) {
Marat Dukhan927d4742021-07-15 13:42:49 -070031129 for (uint32_t n = 4; n <= 6; n += 2) {
31130 for (size_t k = 1; k <= 5; k += 2) {
31131 GemmMicrokernelTester()
31132 .mr(2)
31133 .nr(2)
31134 .kr(1)
31135 .sr(1)
31136 .m(2)
31137 .n(n)
31138 .k(k)
31139 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080031140 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070031141 }
31142 }
31143}
31144
Marat Dukhan2ac722e2022-01-04 01:54:20 -080031145TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_div_2_subtile) {
Marat Dukhan927d4742021-07-15 13:42:49 -070031146 for (uint32_t n = 4; n <= 6; n += 2) {
31147 for (size_t k = 1; k <= 5; k += 2) {
31148 for (uint32_t m = 1; m <= 2; m++) {
31149 GemmMicrokernelTester()
31150 .mr(2)
31151 .nr(2)
31152 .kr(1)
31153 .sr(1)
31154 .m(m)
31155 .n(n)
31156 .k(k)
31157 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080031158 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070031159 }
31160 }
31161 }
31162}
31163
Marat Dukhan2ac722e2022-01-04 01:54:20 -080031164TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, small_kernel) {
Marat Dukhan927d4742021-07-15 13:42:49 -070031165 for (size_t k = 1; k <= 5; k += 2) {
31166 GemmMicrokernelTester()
31167 .mr(2)
31168 .nr(2)
31169 .kr(1)
31170 .sr(1)
31171 .m(2)
31172 .n(2)
31173 .k(k)
31174 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080031175 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070031176 }
31177}
31178
Marat Dukhan2ac722e2022-01-04 01:54:20 -080031179TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, small_kernel_subtile) {
Marat Dukhan927d4742021-07-15 13:42:49 -070031180 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031181 for (uint32_t n = 1; n <= 2; n++) {
31182 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan927d4742021-07-15 13:42:49 -070031183 GemmMicrokernelTester()
31184 .mr(2)
31185 .nr(2)
31186 .kr(1)
31187 .sr(1)
31188 .m(m)
31189 .n(n)
31190 .k(k)
31191 .ks(3)
31192 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080031193 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070031194 }
31195 }
31196 }
31197}
31198
Marat Dukhan2ac722e2022-01-04 01:54:20 -080031199TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_gt_2_small_kernel) {
Marat Dukhan927d4742021-07-15 13:42:49 -070031200 for (uint32_t n = 3; n < 4; n++) {
31201 for (size_t k = 1; k <= 5; k += 2) {
31202 GemmMicrokernelTester()
31203 .mr(2)
31204 .nr(2)
31205 .kr(1)
31206 .sr(1)
31207 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080031208 .n(n)
Marat Dukhan927d4742021-07-15 13:42:49 -070031209 .k(k)
31210 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080031211 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070031212 }
31213 }
31214}
31215
Marat Dukhan2ac722e2022-01-04 01:54:20 -080031216TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_div_2_small_kernel) {
Marat Dukhan927d4742021-07-15 13:42:49 -070031217 for (uint32_t n = 4; n <= 6; n += 2) {
31218 for (size_t k = 1; k <= 5; k += 2) {
31219 GemmMicrokernelTester()
31220 .mr(2)
31221 .nr(2)
31222 .kr(1)
31223 .sr(1)
31224 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080031225 .n(n)
Marat Dukhan927d4742021-07-15 13:42:49 -070031226 .k(k)
31227 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080031228 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070031229 }
31230 }
31231}
31232
Marat Dukhan2ac722e2022-01-04 01:54:20 -080031233TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, strided_cm_subtile) {
Marat Dukhan927d4742021-07-15 13:42:49 -070031234 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031235 for (uint32_t n = 1; n <= 2; n++) {
31236 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan927d4742021-07-15 13:42:49 -070031237 GemmMicrokernelTester()
31238 .mr(2)
31239 .nr(2)
31240 .kr(1)
31241 .sr(1)
31242 .m(m)
31243 .n(n)
31244 .k(k)
31245 .cm_stride(5)
31246 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080031247 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070031248 }
31249 }
31250 }
31251}
31252
Marat Dukhan2ac722e2022-01-04 01:54:20 -080031253TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, a_offset) {
Marat Dukhan927d4742021-07-15 13:42:49 -070031254 for (size_t k = 1; k <= 5; k += 2) {
31255 GemmMicrokernelTester()
31256 .mr(2)
31257 .nr(2)
31258 .kr(1)
31259 .sr(1)
31260 .m(2)
31261 .n(2)
31262 .k(k)
31263 .ks(3)
31264 .a_offset(13)
Marat Dukhan50323b82022-01-11 00:12:01 -080031265 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070031266 }
31267}
31268
Marat Dukhan2ac722e2022-01-04 01:54:20 -080031269TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031270 for (size_t k = 1; k <= 5; k += 2) {
31271 for (uint32_t mz = 0; mz < 2; mz++) {
Marat Dukhan927d4742021-07-15 13:42:49 -070031272 GemmMicrokernelTester()
31273 .mr(2)
31274 .nr(2)
31275 .kr(1)
31276 .sr(1)
31277 .m(2)
31278 .n(2)
31279 .k(k)
31280 .ks(3)
31281 .a_offset(13)
31282 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080031283 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070031284 }
31285 }
31286}
31287
Marat Dukhan2ac722e2022-01-04 01:54:20 -080031288TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, qmin) {
Marat Dukhan927d4742021-07-15 13:42:49 -070031289 GemmMicrokernelTester()
31290 .mr(2)
31291 .nr(2)
31292 .kr(1)
31293 .sr(1)
31294 .m(2)
31295 .n(2)
31296 .k(1)
31297 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080031298 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070031299}
31300
Marat Dukhan2ac722e2022-01-04 01:54:20 -080031301TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, qmax) {
Marat Dukhan927d4742021-07-15 13:42:49 -070031302 GemmMicrokernelTester()
31303 .mr(2)
31304 .nr(2)
31305 .kr(1)
31306 .sr(1)
31307 .m(2)
31308 .n(2)
31309 .k(1)
31310 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080031311 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070031312}
31313
Marat Dukhan2ac722e2022-01-04 01:54:20 -080031314TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, strided_cm) {
Marat Dukhan927d4742021-07-15 13:42:49 -070031315 GemmMicrokernelTester()
31316 .mr(2)
31317 .nr(2)
31318 .kr(1)
31319 .sr(1)
31320 .m(2)
31321 .n(2)
31322 .k(1)
31323 .cm_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080031324 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070031325}
31326
Marat Dukhan2ac722e2022-01-04 01:54:20 -080031327TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, no_a_zero_point) {
Marat Dukhan927d4742021-07-15 13:42:49 -070031328 for (size_t k = 1; k <= 5; k += 2) {
31329 GemmMicrokernelTester()
31330 .mr(2)
31331 .nr(2)
31332 .kr(1)
31333 .sr(1)
31334 .m(2)
31335 .n(2)
31336 .k(k)
31337 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080031338 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070031339 }
31340}
31341
Marat Dukhan2ac722e2022-01-04 01:54:20 -080031342TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, no_b_zero_point) {
Marat Dukhan927d4742021-07-15 13:42:49 -070031343 for (size_t k = 1; k <= 5; k += 2) {
31344 GemmMicrokernelTester()
31345 .mr(2)
31346 .nr(2)
31347 .kr(1)
31348 .sr(1)
31349 .m(2)
31350 .n(2)
31351 .k(k)
31352 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080031353 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070031354 }
31355}
31356
Marat Dukhan2ac722e2022-01-04 01:54:20 -080031357TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, no_zero_point) {
Marat Dukhan927d4742021-07-15 13:42:49 -070031358 for (size_t k = 1; k <= 5; k += 2) {
31359 GemmMicrokernelTester()
31360 .mr(2)
31361 .nr(2)
31362 .kr(1)
31363 .sr(1)
31364 .m(2)
31365 .n(2)
31366 .k(k)
31367 .a_zero_point(0)
31368 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080031369 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070031370 }
31371}
31372
Marat Dukhan2ac722e2022-01-04 01:54:20 -080031373TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, k_eq_1) {
Marat Dukhan927d4742021-07-15 13:42:49 -070031374 GemmMicrokernelTester()
31375 .mr(1)
31376 .nr(4)
31377 .kr(1)
31378 .sr(1)
31379 .m(1)
31380 .n(4)
31381 .k(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080031382 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070031383}
31384
Marat Dukhan2ac722e2022-01-04 01:54:20 -080031385TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, strided_cn) {
Marat Dukhan927d4742021-07-15 13:42:49 -070031386 GemmMicrokernelTester()
31387 .mr(1)
31388 .nr(4)
31389 .kr(1)
31390 .sr(1)
31391 .m(1)
31392 .n(4)
31393 .k(1)
31394 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080031395 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070031396}
31397
Marat Dukhan2ac722e2022-01-04 01:54:20 -080031398TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031399 for (uint32_t n = 1; n <= 4; n++) {
31400 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan927d4742021-07-15 13:42:49 -070031401 GemmMicrokernelTester()
31402 .mr(1)
31403 .nr(4)
31404 .kr(1)
31405 .sr(1)
31406 .m(m)
31407 .n(n)
31408 .k(1)
31409 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080031410 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070031411 }
31412 }
31413}
31414
Marat Dukhan2ac722e2022-01-04 01:54:20 -080031415TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, k_eq_1_subtile_m) {
Marat Dukhan927d4742021-07-15 13:42:49 -070031416 for (uint32_t m = 1; m <= 1; m++) {
31417 GemmMicrokernelTester()
31418 .mr(1)
31419 .nr(4)
31420 .kr(1)
31421 .sr(1)
31422 .m(m)
31423 .n(4)
31424 .k(1)
31425 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080031426 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070031427 }
31428}
31429
Marat Dukhan2ac722e2022-01-04 01:54:20 -080031430TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, k_eq_1_subtile_n) {
Marat Dukhan927d4742021-07-15 13:42:49 -070031431 for (uint32_t n = 1; n <= 4; n++) {
31432 GemmMicrokernelTester()
31433 .mr(1)
31434 .nr(4)
31435 .kr(1)
31436 .sr(1)
31437 .m(1)
31438 .n(n)
31439 .k(1)
31440 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080031441 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070031442 }
31443}
31444
Marat Dukhan2ac722e2022-01-04 01:54:20 -080031445TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, k_gt_1) {
Marat Dukhan927d4742021-07-15 13:42:49 -070031446 for (size_t k = 2; k < 10; k++) {
31447 GemmMicrokernelTester()
31448 .mr(1)
31449 .nr(4)
31450 .kr(1)
31451 .sr(1)
31452 .m(1)
31453 .n(4)
31454 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080031455 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070031456 }
31457}
31458
Marat Dukhan2ac722e2022-01-04 01:54:20 -080031459TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, k_gt_1_subtile) {
Marat Dukhan927d4742021-07-15 13:42:49 -070031460 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031461 for (uint32_t n = 1; n <= 4; n++) {
31462 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan927d4742021-07-15 13:42:49 -070031463 GemmMicrokernelTester()
31464 .mr(1)
31465 .nr(4)
31466 .kr(1)
31467 .sr(1)
31468 .m(m)
31469 .n(n)
31470 .k(k)
31471 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080031472 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070031473 }
31474 }
31475 }
31476}
31477
Marat Dukhan2ac722e2022-01-04 01:54:20 -080031478TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, n_gt_4) {
Marat Dukhan927d4742021-07-15 13:42:49 -070031479 for (uint32_t n = 5; n < 8; n++) {
31480 for (size_t k = 1; k <= 5; k += 2) {
31481 GemmMicrokernelTester()
31482 .mr(1)
31483 .nr(4)
31484 .kr(1)
31485 .sr(1)
31486 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080031487 .n(n)
Marat Dukhan927d4742021-07-15 13:42:49 -070031488 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080031489 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070031490 }
31491 }
31492}
31493
Marat Dukhan2ac722e2022-01-04 01:54:20 -080031494TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, n_gt_4_strided_cn) {
Marat Dukhan927d4742021-07-15 13:42:49 -070031495 for (uint32_t n = 5; n < 8; n++) {
31496 for (size_t k = 1; k <= 5; k += 2) {
31497 GemmMicrokernelTester()
31498 .mr(1)
31499 .nr(4)
31500 .kr(1)
31501 .sr(1)
31502 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080031503 .n(n)
Marat Dukhan927d4742021-07-15 13:42:49 -070031504 .k(k)
31505 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080031506 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070031507 }
31508 }
31509}
31510
Marat Dukhan2ac722e2022-01-04 01:54:20 -080031511TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, n_gt_4_subtile) {
Marat Dukhan927d4742021-07-15 13:42:49 -070031512 for (uint32_t n = 5; n < 8; n++) {
31513 for (size_t k = 1; k <= 5; k += 2) {
31514 for (uint32_t m = 1; m <= 1; m++) {
31515 GemmMicrokernelTester()
31516 .mr(1)
31517 .nr(4)
31518 .kr(1)
31519 .sr(1)
31520 .m(m)
31521 .n(n)
31522 .k(k)
31523 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080031524 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070031525 }
31526 }
31527 }
31528}
31529
Marat Dukhan2ac722e2022-01-04 01:54:20 -080031530TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, n_div_4) {
Marat Dukhan927d4742021-07-15 13:42:49 -070031531 for (uint32_t n = 8; n <= 12; n += 4) {
31532 for (size_t k = 1; k <= 5; k += 2) {
31533 GemmMicrokernelTester()
31534 .mr(1)
31535 .nr(4)
31536 .kr(1)
31537 .sr(1)
31538 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080031539 .n(n)
Marat Dukhan927d4742021-07-15 13:42:49 -070031540 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080031541 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070031542 }
31543 }
31544}
31545
Marat Dukhan2ac722e2022-01-04 01:54:20 -080031546TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, n_div_4_strided_cn) {
Marat Dukhan927d4742021-07-15 13:42:49 -070031547 for (uint32_t n = 8; n <= 12; n += 4) {
31548 for (size_t k = 1; k <= 5; k += 2) {
31549 GemmMicrokernelTester()
31550 .mr(1)
31551 .nr(4)
31552 .kr(1)
31553 .sr(1)
31554 .m(1)
31555 .n(n)
31556 .k(k)
31557 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080031558 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070031559 }
31560 }
31561}
31562
Marat Dukhan2ac722e2022-01-04 01:54:20 -080031563TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, n_div_4_subtile) {
Marat Dukhan927d4742021-07-15 13:42:49 -070031564 for (uint32_t n = 8; n <= 12; n += 4) {
31565 for (size_t k = 1; k <= 5; k += 2) {
31566 for (uint32_t m = 1; m <= 1; m++) {
31567 GemmMicrokernelTester()
31568 .mr(1)
31569 .nr(4)
31570 .kr(1)
31571 .sr(1)
31572 .m(m)
31573 .n(n)
31574 .k(k)
31575 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080031576 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070031577 }
31578 }
31579 }
31580}
31581
Marat Dukhan2ac722e2022-01-04 01:54:20 -080031582TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, small_kernel) {
Marat Dukhan927d4742021-07-15 13:42:49 -070031583 for (size_t k = 1; k <= 5; k += 2) {
31584 GemmMicrokernelTester()
31585 .mr(1)
31586 .nr(4)
31587 .kr(1)
31588 .sr(1)
31589 .m(1)
31590 .n(4)
31591 .k(k)
31592 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080031593 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070031594 }
31595}
31596
Marat Dukhan2ac722e2022-01-04 01:54:20 -080031597TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, small_kernel_subtile) {
Marat Dukhan927d4742021-07-15 13:42:49 -070031598 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031599 for (uint32_t n = 1; n <= 4; n++) {
31600 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan927d4742021-07-15 13:42:49 -070031601 GemmMicrokernelTester()
31602 .mr(1)
31603 .nr(4)
31604 .kr(1)
31605 .sr(1)
31606 .m(m)
31607 .n(n)
31608 .k(k)
31609 .ks(3)
31610 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080031611 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070031612 }
31613 }
31614 }
31615}
31616
Marat Dukhan2ac722e2022-01-04 01:54:20 -080031617TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, n_gt_4_small_kernel) {
Marat Dukhan927d4742021-07-15 13:42:49 -070031618 for (uint32_t n = 5; n < 8; n++) {
31619 for (size_t k = 1; k <= 5; k += 2) {
31620 GemmMicrokernelTester()
31621 .mr(1)
31622 .nr(4)
31623 .kr(1)
31624 .sr(1)
31625 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080031626 .n(n)
Marat Dukhan927d4742021-07-15 13:42:49 -070031627 .k(k)
31628 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080031629 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070031630 }
31631 }
31632}
31633
Marat Dukhan2ac722e2022-01-04 01:54:20 -080031634TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, n_div_4_small_kernel) {
Marat Dukhan927d4742021-07-15 13:42:49 -070031635 for (uint32_t n = 8; n <= 12; n += 4) {
31636 for (size_t k = 1; k <= 5; k += 2) {
31637 GemmMicrokernelTester()
31638 .mr(1)
31639 .nr(4)
31640 .kr(1)
31641 .sr(1)
31642 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080031643 .n(n)
Marat Dukhan927d4742021-07-15 13:42:49 -070031644 .k(k)
31645 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080031646 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070031647 }
31648 }
31649}
31650
Marat Dukhan2ac722e2022-01-04 01:54:20 -080031651TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, strided_cm_subtile) {
Marat Dukhan927d4742021-07-15 13:42:49 -070031652 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031653 for (uint32_t n = 1; n <= 4; n++) {
31654 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan927d4742021-07-15 13:42:49 -070031655 GemmMicrokernelTester()
31656 .mr(1)
31657 .nr(4)
31658 .kr(1)
31659 .sr(1)
31660 .m(m)
31661 .n(n)
31662 .k(k)
31663 .cm_stride(7)
31664 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080031665 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070031666 }
31667 }
31668 }
31669}
31670
Marat Dukhan2ac722e2022-01-04 01:54:20 -080031671TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, a_offset) {
Marat Dukhan927d4742021-07-15 13:42:49 -070031672 for (size_t k = 1; k <= 5; k += 2) {
31673 GemmMicrokernelTester()
31674 .mr(1)
31675 .nr(4)
31676 .kr(1)
31677 .sr(1)
31678 .m(1)
31679 .n(4)
31680 .k(k)
31681 .ks(3)
31682 .a_offset(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080031683 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070031684 }
31685}
31686
Marat Dukhan2ac722e2022-01-04 01:54:20 -080031687TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031688 for (size_t k = 1; k <= 5; k += 2) {
31689 for (uint32_t mz = 0; mz < 1; mz++) {
Marat Dukhan927d4742021-07-15 13:42:49 -070031690 GemmMicrokernelTester()
31691 .mr(1)
31692 .nr(4)
31693 .kr(1)
31694 .sr(1)
31695 .m(1)
31696 .n(4)
31697 .k(k)
31698 .ks(3)
31699 .a_offset(7)
31700 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080031701 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070031702 }
31703 }
31704}
31705
Marat Dukhan2ac722e2022-01-04 01:54:20 -080031706TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, qmin) {
Marat Dukhan927d4742021-07-15 13:42:49 -070031707 GemmMicrokernelTester()
31708 .mr(1)
31709 .nr(4)
31710 .kr(1)
31711 .sr(1)
31712 .m(1)
31713 .n(4)
31714 .k(1)
31715 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080031716 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070031717}
31718
Marat Dukhan2ac722e2022-01-04 01:54:20 -080031719TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, qmax) {
Marat Dukhan927d4742021-07-15 13:42:49 -070031720 GemmMicrokernelTester()
31721 .mr(1)
31722 .nr(4)
31723 .kr(1)
31724 .sr(1)
31725 .m(1)
31726 .n(4)
31727 .k(1)
31728 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080031729 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070031730}
31731
Marat Dukhan2ac722e2022-01-04 01:54:20 -080031732TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, strided_cm) {
Marat Dukhan927d4742021-07-15 13:42:49 -070031733 GemmMicrokernelTester()
31734 .mr(1)
31735 .nr(4)
31736 .kr(1)
31737 .sr(1)
31738 .m(1)
31739 .n(4)
31740 .k(1)
31741 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080031742 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070031743}
31744
Marat Dukhan2ac722e2022-01-04 01:54:20 -080031745TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, no_a_zero_point) {
Marat Dukhan927d4742021-07-15 13:42:49 -070031746 for (size_t k = 1; k <= 5; k += 2) {
31747 GemmMicrokernelTester()
31748 .mr(1)
31749 .nr(4)
31750 .kr(1)
31751 .sr(1)
31752 .m(1)
31753 .n(4)
31754 .k(k)
31755 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080031756 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070031757 }
31758}
31759
Marat Dukhan2ac722e2022-01-04 01:54:20 -080031760TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, no_b_zero_point) {
Marat Dukhan927d4742021-07-15 13:42:49 -070031761 for (size_t k = 1; k <= 5; k += 2) {
31762 GemmMicrokernelTester()
31763 .mr(1)
31764 .nr(4)
31765 .kr(1)
31766 .sr(1)
31767 .m(1)
31768 .n(4)
31769 .k(k)
31770 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080031771 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070031772 }
31773}
31774
Marat Dukhan2ac722e2022-01-04 01:54:20 -080031775TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, no_zero_point) {
Marat Dukhan927d4742021-07-15 13:42:49 -070031776 for (size_t k = 1; k <= 5; k += 2) {
31777 GemmMicrokernelTester()
31778 .mr(1)
31779 .nr(4)
31780 .kr(1)
31781 .sr(1)
31782 .m(1)
31783 .n(4)
31784 .k(k)
31785 .a_zero_point(0)
31786 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080031787 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070031788 }
31789}
31790
Marat Dukhan2ac722e2022-01-04 01:54:20 -080031791TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, k_eq_1) {
Marat Dukhan927d4742021-07-15 13:42:49 -070031792 GemmMicrokernelTester()
31793 .mr(2)
31794 .nr(4)
31795 .kr(1)
31796 .sr(1)
31797 .m(2)
31798 .n(4)
31799 .k(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080031800 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070031801}
31802
Marat Dukhan2ac722e2022-01-04 01:54:20 -080031803TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, strided_cn) {
Marat Dukhan927d4742021-07-15 13:42:49 -070031804 GemmMicrokernelTester()
31805 .mr(2)
31806 .nr(4)
31807 .kr(1)
31808 .sr(1)
31809 .m(2)
31810 .n(4)
31811 .k(1)
31812 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080031813 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070031814}
31815
Marat Dukhan2ac722e2022-01-04 01:54:20 -080031816TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031817 for (uint32_t n = 1; n <= 4; n++) {
31818 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan927d4742021-07-15 13:42:49 -070031819 GemmMicrokernelTester()
31820 .mr(2)
31821 .nr(4)
31822 .kr(1)
31823 .sr(1)
31824 .m(m)
31825 .n(n)
31826 .k(1)
31827 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080031828 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070031829 }
31830 }
31831}
31832
Marat Dukhan2ac722e2022-01-04 01:54:20 -080031833TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, k_eq_1_subtile_m) {
Marat Dukhan927d4742021-07-15 13:42:49 -070031834 for (uint32_t m = 1; m <= 2; m++) {
31835 GemmMicrokernelTester()
31836 .mr(2)
31837 .nr(4)
31838 .kr(1)
31839 .sr(1)
31840 .m(m)
31841 .n(4)
31842 .k(1)
31843 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080031844 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070031845 }
31846}
31847
Marat Dukhan2ac722e2022-01-04 01:54:20 -080031848TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, k_eq_1_subtile_n) {
Marat Dukhan927d4742021-07-15 13:42:49 -070031849 for (uint32_t n = 1; n <= 4; n++) {
31850 GemmMicrokernelTester()
31851 .mr(2)
31852 .nr(4)
31853 .kr(1)
31854 .sr(1)
31855 .m(2)
31856 .n(n)
31857 .k(1)
31858 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080031859 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070031860 }
31861}
31862
Marat Dukhan2ac722e2022-01-04 01:54:20 -080031863TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, k_gt_1) {
Marat Dukhan927d4742021-07-15 13:42:49 -070031864 for (size_t k = 2; k < 10; k++) {
31865 GemmMicrokernelTester()
31866 .mr(2)
31867 .nr(4)
31868 .kr(1)
31869 .sr(1)
31870 .m(2)
31871 .n(4)
31872 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080031873 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070031874 }
31875}
31876
Marat Dukhan2ac722e2022-01-04 01:54:20 -080031877TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, k_gt_1_subtile) {
Marat Dukhan927d4742021-07-15 13:42:49 -070031878 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080031879 for (uint32_t n = 1; n <= 4; n++) {
31880 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan927d4742021-07-15 13:42:49 -070031881 GemmMicrokernelTester()
31882 .mr(2)
31883 .nr(4)
31884 .kr(1)
31885 .sr(1)
31886 .m(m)
31887 .n(n)
31888 .k(k)
31889 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080031890 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070031891 }
31892 }
31893 }
31894}
31895
Marat Dukhan2ac722e2022-01-04 01:54:20 -080031896TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, n_gt_4) {
Marat Dukhan927d4742021-07-15 13:42:49 -070031897 for (uint32_t n = 5; n < 8; n++) {
31898 for (size_t k = 1; k <= 5; k += 2) {
31899 GemmMicrokernelTester()
31900 .mr(2)
31901 .nr(4)
31902 .kr(1)
31903 .sr(1)
31904 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080031905 .n(n)
Marat Dukhan927d4742021-07-15 13:42:49 -070031906 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080031907 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070031908 }
31909 }
31910}
31911
Marat Dukhan2ac722e2022-01-04 01:54:20 -080031912TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, n_gt_4_strided_cn) {
Marat Dukhan927d4742021-07-15 13:42:49 -070031913 for (uint32_t n = 5; n < 8; n++) {
31914 for (size_t k = 1; k <= 5; k += 2) {
31915 GemmMicrokernelTester()
31916 .mr(2)
31917 .nr(4)
31918 .kr(1)
31919 .sr(1)
31920 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080031921 .n(n)
Marat Dukhan927d4742021-07-15 13:42:49 -070031922 .k(k)
31923 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080031924 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070031925 }
31926 }
31927}
31928
Marat Dukhan2ac722e2022-01-04 01:54:20 -080031929TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, n_gt_4_subtile) {
Marat Dukhan927d4742021-07-15 13:42:49 -070031930 for (uint32_t n = 5; n < 8; n++) {
31931 for (size_t k = 1; k <= 5; k += 2) {
31932 for (uint32_t m = 1; m <= 2; m++) {
31933 GemmMicrokernelTester()
31934 .mr(2)
31935 .nr(4)
31936 .kr(1)
31937 .sr(1)
31938 .m(m)
31939 .n(n)
31940 .k(k)
31941 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080031942 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070031943 }
31944 }
31945 }
31946}
31947
Marat Dukhan2ac722e2022-01-04 01:54:20 -080031948TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, n_div_4) {
Marat Dukhan927d4742021-07-15 13:42:49 -070031949 for (uint32_t n = 8; n <= 12; n += 4) {
31950 for (size_t k = 1; k <= 5; k += 2) {
31951 GemmMicrokernelTester()
31952 .mr(2)
31953 .nr(4)
31954 .kr(1)
31955 .sr(1)
31956 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080031957 .n(n)
Marat Dukhan927d4742021-07-15 13:42:49 -070031958 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080031959 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070031960 }
31961 }
31962}
31963
Marat Dukhan2ac722e2022-01-04 01:54:20 -080031964TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, n_div_4_strided_cn) {
Marat Dukhan927d4742021-07-15 13:42:49 -070031965 for (uint32_t n = 8; n <= 12; n += 4) {
31966 for (size_t k = 1; k <= 5; k += 2) {
31967 GemmMicrokernelTester()
31968 .mr(2)
31969 .nr(4)
31970 .kr(1)
31971 .sr(1)
31972 .m(2)
31973 .n(n)
31974 .k(k)
31975 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080031976 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070031977 }
31978 }
31979}
31980
Marat Dukhan2ac722e2022-01-04 01:54:20 -080031981TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, n_div_4_subtile) {
Marat Dukhan927d4742021-07-15 13:42:49 -070031982 for (uint32_t n = 8; n <= 12; n += 4) {
31983 for (size_t k = 1; k <= 5; k += 2) {
31984 for (uint32_t m = 1; m <= 2; m++) {
31985 GemmMicrokernelTester()
31986 .mr(2)
31987 .nr(4)
31988 .kr(1)
31989 .sr(1)
31990 .m(m)
31991 .n(n)
31992 .k(k)
31993 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080031994 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070031995 }
31996 }
31997 }
31998}
31999
Marat Dukhan2ac722e2022-01-04 01:54:20 -080032000TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, small_kernel) {
Marat Dukhan927d4742021-07-15 13:42:49 -070032001 for (size_t k = 1; k <= 5; k += 2) {
32002 GemmMicrokernelTester()
32003 .mr(2)
32004 .nr(4)
32005 .kr(1)
32006 .sr(1)
32007 .m(2)
32008 .n(4)
32009 .k(k)
32010 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080032011 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070032012 }
32013}
32014
Marat Dukhan2ac722e2022-01-04 01:54:20 -080032015TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, small_kernel_subtile) {
Marat Dukhan927d4742021-07-15 13:42:49 -070032016 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080032017 for (uint32_t n = 1; n <= 4; n++) {
32018 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan927d4742021-07-15 13:42:49 -070032019 GemmMicrokernelTester()
32020 .mr(2)
32021 .nr(4)
32022 .kr(1)
32023 .sr(1)
32024 .m(m)
32025 .n(n)
32026 .k(k)
32027 .ks(3)
32028 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080032029 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070032030 }
32031 }
32032 }
32033}
32034
Marat Dukhan2ac722e2022-01-04 01:54:20 -080032035TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, n_gt_4_small_kernel) {
Marat Dukhan927d4742021-07-15 13:42:49 -070032036 for (uint32_t n = 5; n < 8; n++) {
32037 for (size_t k = 1; k <= 5; k += 2) {
32038 GemmMicrokernelTester()
32039 .mr(2)
32040 .nr(4)
32041 .kr(1)
32042 .sr(1)
32043 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080032044 .n(n)
Marat Dukhan927d4742021-07-15 13:42:49 -070032045 .k(k)
32046 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080032047 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070032048 }
32049 }
32050}
32051
Marat Dukhan2ac722e2022-01-04 01:54:20 -080032052TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, n_div_4_small_kernel) {
Marat Dukhan927d4742021-07-15 13:42:49 -070032053 for (uint32_t n = 8; n <= 12; n += 4) {
32054 for (size_t k = 1; k <= 5; k += 2) {
32055 GemmMicrokernelTester()
32056 .mr(2)
32057 .nr(4)
32058 .kr(1)
32059 .sr(1)
32060 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080032061 .n(n)
Marat Dukhan927d4742021-07-15 13:42:49 -070032062 .k(k)
32063 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080032064 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070032065 }
32066 }
32067}
32068
Marat Dukhan2ac722e2022-01-04 01:54:20 -080032069TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, strided_cm_subtile) {
Marat Dukhan927d4742021-07-15 13:42:49 -070032070 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080032071 for (uint32_t n = 1; n <= 4; n++) {
32072 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan927d4742021-07-15 13:42:49 -070032073 GemmMicrokernelTester()
32074 .mr(2)
32075 .nr(4)
32076 .kr(1)
32077 .sr(1)
32078 .m(m)
32079 .n(n)
32080 .k(k)
32081 .cm_stride(7)
32082 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080032083 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070032084 }
32085 }
32086 }
32087}
32088
Marat Dukhan2ac722e2022-01-04 01:54:20 -080032089TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, a_offset) {
Marat Dukhan927d4742021-07-15 13:42:49 -070032090 for (size_t k = 1; k <= 5; k += 2) {
32091 GemmMicrokernelTester()
32092 .mr(2)
32093 .nr(4)
32094 .kr(1)
32095 .sr(1)
32096 .m(2)
32097 .n(4)
32098 .k(k)
32099 .ks(3)
32100 .a_offset(13)
Marat Dukhan50323b82022-01-11 00:12:01 -080032101 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070032102 }
32103}
32104
Marat Dukhan2ac722e2022-01-04 01:54:20 -080032105TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080032106 for (size_t k = 1; k <= 5; k += 2) {
32107 for (uint32_t mz = 0; mz < 2; mz++) {
Marat Dukhan927d4742021-07-15 13:42:49 -070032108 GemmMicrokernelTester()
32109 .mr(2)
32110 .nr(4)
32111 .kr(1)
32112 .sr(1)
32113 .m(2)
32114 .n(4)
32115 .k(k)
32116 .ks(3)
32117 .a_offset(13)
32118 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080032119 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070032120 }
32121 }
32122}
32123
Marat Dukhan2ac722e2022-01-04 01:54:20 -080032124TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, qmin) {
Marat Dukhan927d4742021-07-15 13:42:49 -070032125 GemmMicrokernelTester()
32126 .mr(2)
32127 .nr(4)
32128 .kr(1)
32129 .sr(1)
32130 .m(2)
32131 .n(4)
32132 .k(1)
32133 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080032134 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070032135}
32136
Marat Dukhan2ac722e2022-01-04 01:54:20 -080032137TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, qmax) {
Marat Dukhan927d4742021-07-15 13:42:49 -070032138 GemmMicrokernelTester()
32139 .mr(2)
32140 .nr(4)
32141 .kr(1)
32142 .sr(1)
32143 .m(2)
32144 .n(4)
32145 .k(1)
32146 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080032147 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070032148}
32149
Marat Dukhan2ac722e2022-01-04 01:54:20 -080032150TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, strided_cm) {
Marat Dukhan927d4742021-07-15 13:42:49 -070032151 GemmMicrokernelTester()
32152 .mr(2)
32153 .nr(4)
32154 .kr(1)
32155 .sr(1)
32156 .m(2)
32157 .n(4)
32158 .k(1)
32159 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080032160 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070032161}
32162
Marat Dukhan2ac722e2022-01-04 01:54:20 -080032163TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, no_a_zero_point) {
Marat Dukhan927d4742021-07-15 13:42:49 -070032164 for (size_t k = 1; k <= 5; k += 2) {
32165 GemmMicrokernelTester()
32166 .mr(2)
32167 .nr(4)
32168 .kr(1)
32169 .sr(1)
32170 .m(2)
32171 .n(4)
32172 .k(k)
32173 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080032174 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070032175 }
32176}
32177
Marat Dukhan2ac722e2022-01-04 01:54:20 -080032178TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, no_b_zero_point) {
Marat Dukhan927d4742021-07-15 13:42:49 -070032179 for (size_t k = 1; k <= 5; k += 2) {
32180 GemmMicrokernelTester()
32181 .mr(2)
32182 .nr(4)
32183 .kr(1)
32184 .sr(1)
32185 .m(2)
32186 .n(4)
32187 .k(k)
32188 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080032189 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070032190 }
32191}
32192
Marat Dukhan2ac722e2022-01-04 01:54:20 -080032193TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, no_zero_point) {
Marat Dukhan927d4742021-07-15 13:42:49 -070032194 for (size_t k = 1; k <= 5; k += 2) {
32195 GemmMicrokernelTester()
32196 .mr(2)
32197 .nr(4)
32198 .kr(1)
32199 .sr(1)
32200 .m(2)
32201 .n(4)
32202 .k(k)
32203 .a_zero_point(0)
32204 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080032205 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan927d4742021-07-15 13:42:49 -070032206 }
32207}
32208
Marat Dukhan272d4d92022-01-04 15:07:14 -080032209TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_eq_1) {
32210 GemmMicrokernelTester()
32211 .mr(3)
32212 .nr(2)
32213 .kr(1)
32214 .sr(1)
32215 .m(3)
32216 .n(2)
32217 .k(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080032218 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080032219}
32220
32221TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, strided_cn) {
32222 GemmMicrokernelTester()
32223 .mr(3)
32224 .nr(2)
32225 .kr(1)
32226 .sr(1)
32227 .m(3)
32228 .n(2)
32229 .k(1)
32230 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080032231 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080032232}
32233
32234TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080032235 for (uint32_t n = 1; n <= 2; n++) {
32236 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080032237 GemmMicrokernelTester()
32238 .mr(3)
32239 .nr(2)
32240 .kr(1)
32241 .sr(1)
32242 .m(m)
32243 .n(n)
32244 .k(1)
32245 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080032246 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080032247 }
32248 }
32249}
32250
32251TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_eq_1_subtile_m) {
32252 for (uint32_t m = 1; m <= 3; m++) {
32253 GemmMicrokernelTester()
32254 .mr(3)
32255 .nr(2)
32256 .kr(1)
32257 .sr(1)
32258 .m(m)
32259 .n(2)
32260 .k(1)
32261 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080032262 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080032263 }
32264}
32265
32266TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_eq_1_subtile_n) {
32267 for (uint32_t n = 1; n <= 2; n++) {
32268 GemmMicrokernelTester()
32269 .mr(3)
32270 .nr(2)
32271 .kr(1)
32272 .sr(1)
32273 .m(3)
32274 .n(n)
32275 .k(1)
32276 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080032277 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080032278 }
32279}
32280
32281TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_gt_1) {
32282 for (size_t k = 2; k < 10; k++) {
32283 GemmMicrokernelTester()
32284 .mr(3)
32285 .nr(2)
32286 .kr(1)
32287 .sr(1)
32288 .m(3)
32289 .n(2)
32290 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080032291 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080032292 }
32293}
32294
32295TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_gt_1_subtile) {
32296 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080032297 for (uint32_t n = 1; n <= 2; n++) {
32298 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080032299 GemmMicrokernelTester()
32300 .mr(3)
32301 .nr(2)
32302 .kr(1)
32303 .sr(1)
32304 .m(m)
32305 .n(n)
32306 .k(k)
32307 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080032308 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080032309 }
32310 }
32311 }
32312}
32313
32314TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_gt_2) {
32315 for (uint32_t n = 3; n < 4; n++) {
32316 for (size_t k = 1; k <= 5; k += 2) {
32317 GemmMicrokernelTester()
32318 .mr(3)
32319 .nr(2)
32320 .kr(1)
32321 .sr(1)
32322 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080032323 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080032324 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080032325 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080032326 }
32327 }
32328}
32329
32330TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_gt_2_strided_cn) {
32331 for (uint32_t n = 3; n < 4; n++) {
32332 for (size_t k = 1; k <= 5; k += 2) {
32333 GemmMicrokernelTester()
32334 .mr(3)
32335 .nr(2)
32336 .kr(1)
32337 .sr(1)
32338 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080032339 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080032340 .k(k)
32341 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080032342 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080032343 }
32344 }
32345}
32346
32347TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_gt_2_subtile) {
32348 for (uint32_t n = 3; n < 4; n++) {
32349 for (size_t k = 1; k <= 5; k += 2) {
32350 for (uint32_t m = 1; m <= 3; m++) {
32351 GemmMicrokernelTester()
32352 .mr(3)
32353 .nr(2)
32354 .kr(1)
32355 .sr(1)
32356 .m(m)
32357 .n(n)
32358 .k(k)
32359 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080032360 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080032361 }
32362 }
32363 }
32364}
32365
32366TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_div_2) {
32367 for (uint32_t n = 4; n <= 6; n += 2) {
32368 for (size_t k = 1; k <= 5; k += 2) {
32369 GemmMicrokernelTester()
32370 .mr(3)
32371 .nr(2)
32372 .kr(1)
32373 .sr(1)
32374 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080032375 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080032376 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080032377 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080032378 }
32379 }
32380}
32381
32382TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_div_2_strided_cn) {
32383 for (uint32_t n = 4; n <= 6; n += 2) {
32384 for (size_t k = 1; k <= 5; k += 2) {
32385 GemmMicrokernelTester()
32386 .mr(3)
32387 .nr(2)
32388 .kr(1)
32389 .sr(1)
32390 .m(3)
32391 .n(n)
32392 .k(k)
32393 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080032394 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080032395 }
32396 }
32397}
32398
32399TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_div_2_subtile) {
32400 for (uint32_t n = 4; n <= 6; n += 2) {
32401 for (size_t k = 1; k <= 5; k += 2) {
32402 for (uint32_t m = 1; m <= 3; m++) {
32403 GemmMicrokernelTester()
32404 .mr(3)
32405 .nr(2)
32406 .kr(1)
32407 .sr(1)
32408 .m(m)
32409 .n(n)
32410 .k(k)
32411 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080032412 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080032413 }
32414 }
32415 }
32416}
32417
32418TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, small_kernel) {
32419 for (size_t k = 1; k <= 5; k += 2) {
32420 GemmMicrokernelTester()
32421 .mr(3)
32422 .nr(2)
32423 .kr(1)
32424 .sr(1)
32425 .m(3)
32426 .n(2)
32427 .k(k)
32428 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080032429 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080032430 }
32431}
32432
32433TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, small_kernel_subtile) {
32434 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080032435 for (uint32_t n = 1; n <= 2; n++) {
32436 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080032437 GemmMicrokernelTester()
32438 .mr(3)
32439 .nr(2)
32440 .kr(1)
32441 .sr(1)
32442 .m(m)
32443 .n(n)
32444 .k(k)
32445 .ks(3)
32446 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080032447 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080032448 }
32449 }
32450 }
32451}
32452
32453TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_gt_2_small_kernel) {
32454 for (uint32_t n = 3; n < 4; n++) {
32455 for (size_t k = 1; k <= 5; k += 2) {
32456 GemmMicrokernelTester()
32457 .mr(3)
32458 .nr(2)
32459 .kr(1)
32460 .sr(1)
32461 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080032462 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080032463 .k(k)
32464 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080032465 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080032466 }
32467 }
32468}
32469
32470TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_div_2_small_kernel) {
32471 for (uint32_t n = 4; n <= 6; n += 2) {
32472 for (size_t k = 1; k <= 5; k += 2) {
32473 GemmMicrokernelTester()
32474 .mr(3)
32475 .nr(2)
32476 .kr(1)
32477 .sr(1)
32478 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080032479 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080032480 .k(k)
32481 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080032482 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080032483 }
32484 }
32485}
32486
32487TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, strided_cm_subtile) {
32488 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080032489 for (uint32_t n = 1; n <= 2; n++) {
32490 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080032491 GemmMicrokernelTester()
32492 .mr(3)
32493 .nr(2)
32494 .kr(1)
32495 .sr(1)
32496 .m(m)
32497 .n(n)
32498 .k(k)
32499 .cm_stride(5)
32500 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080032501 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080032502 }
32503 }
32504 }
32505}
32506
32507TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, a_offset) {
32508 for (size_t k = 1; k <= 5; k += 2) {
32509 GemmMicrokernelTester()
32510 .mr(3)
32511 .nr(2)
32512 .kr(1)
32513 .sr(1)
32514 .m(3)
32515 .n(2)
32516 .k(k)
32517 .ks(3)
32518 .a_offset(17)
Marat Dukhan50323b82022-01-11 00:12:01 -080032519 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080032520 }
32521}
32522
32523TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080032524 for (size_t k = 1; k <= 5; k += 2) {
32525 for (uint32_t mz = 0; mz < 3; mz++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080032526 GemmMicrokernelTester()
32527 .mr(3)
32528 .nr(2)
32529 .kr(1)
32530 .sr(1)
32531 .m(3)
32532 .n(2)
32533 .k(k)
32534 .ks(3)
32535 .a_offset(17)
32536 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080032537 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080032538 }
32539 }
32540}
32541
32542TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, qmin) {
32543 GemmMicrokernelTester()
32544 .mr(3)
32545 .nr(2)
32546 .kr(1)
32547 .sr(1)
32548 .m(3)
32549 .n(2)
32550 .k(1)
32551 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080032552 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080032553}
32554
32555TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, qmax) {
32556 GemmMicrokernelTester()
32557 .mr(3)
32558 .nr(2)
32559 .kr(1)
32560 .sr(1)
32561 .m(3)
32562 .n(2)
32563 .k(1)
32564 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080032565 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080032566}
32567
32568TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, strided_cm) {
32569 GemmMicrokernelTester()
32570 .mr(3)
32571 .nr(2)
32572 .kr(1)
32573 .sr(1)
32574 .m(3)
32575 .n(2)
32576 .k(1)
32577 .cm_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080032578 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080032579}
32580
32581TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, no_a_zero_point) {
32582 for (size_t k = 1; k <= 5; k += 2) {
32583 GemmMicrokernelTester()
32584 .mr(3)
32585 .nr(2)
32586 .kr(1)
32587 .sr(1)
32588 .m(3)
32589 .n(2)
32590 .k(k)
32591 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080032592 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080032593 }
32594}
32595
32596TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, no_b_zero_point) {
32597 for (size_t k = 1; k <= 5; k += 2) {
32598 GemmMicrokernelTester()
32599 .mr(3)
32600 .nr(2)
32601 .kr(1)
32602 .sr(1)
32603 .m(3)
32604 .n(2)
32605 .k(k)
32606 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080032607 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080032608 }
32609}
32610
32611TEST(QU8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, no_zero_point) {
32612 for (size_t k = 1; k <= 5; k += 2) {
32613 GemmMicrokernelTester()
32614 .mr(3)
32615 .nr(2)
32616 .kr(1)
32617 .sr(1)
32618 .m(3)
32619 .n(2)
32620 .k(k)
32621 .a_zero_point(0)
32622 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080032623 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080032624 }
32625}
32626
32627TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_eq_1) {
32628 GemmMicrokernelTester()
32629 .mr(4)
32630 .nr(2)
32631 .kr(1)
32632 .sr(1)
32633 .m(4)
32634 .n(2)
32635 .k(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080032636 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080032637}
32638
32639TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, strided_cn) {
32640 GemmMicrokernelTester()
32641 .mr(4)
32642 .nr(2)
32643 .kr(1)
32644 .sr(1)
32645 .m(4)
32646 .n(2)
32647 .k(1)
32648 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080032649 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080032650}
32651
32652TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080032653 for (uint32_t n = 1; n <= 2; n++) {
32654 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080032655 GemmMicrokernelTester()
32656 .mr(4)
32657 .nr(2)
32658 .kr(1)
32659 .sr(1)
32660 .m(m)
32661 .n(n)
32662 .k(1)
32663 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080032664 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080032665 }
32666 }
32667}
32668
32669TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_eq_1_subtile_m) {
32670 for (uint32_t m = 1; m <= 4; m++) {
32671 GemmMicrokernelTester()
32672 .mr(4)
32673 .nr(2)
32674 .kr(1)
32675 .sr(1)
32676 .m(m)
32677 .n(2)
32678 .k(1)
32679 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080032680 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080032681 }
32682}
32683
32684TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_eq_1_subtile_n) {
32685 for (uint32_t n = 1; n <= 2; n++) {
32686 GemmMicrokernelTester()
32687 .mr(4)
32688 .nr(2)
32689 .kr(1)
32690 .sr(1)
32691 .m(4)
32692 .n(n)
32693 .k(1)
32694 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080032695 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080032696 }
32697}
32698
32699TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_gt_1) {
32700 for (size_t k = 2; k < 10; k++) {
32701 GemmMicrokernelTester()
32702 .mr(4)
32703 .nr(2)
32704 .kr(1)
32705 .sr(1)
32706 .m(4)
32707 .n(2)
32708 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080032709 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080032710 }
32711}
32712
32713TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_gt_1_subtile) {
32714 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080032715 for (uint32_t n = 1; n <= 2; n++) {
32716 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080032717 GemmMicrokernelTester()
32718 .mr(4)
32719 .nr(2)
32720 .kr(1)
32721 .sr(1)
32722 .m(m)
32723 .n(n)
32724 .k(k)
32725 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080032726 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080032727 }
32728 }
32729 }
32730}
32731
32732TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_gt_2) {
32733 for (uint32_t n = 3; n < 4; n++) {
32734 for (size_t k = 1; k <= 5; k += 2) {
32735 GemmMicrokernelTester()
32736 .mr(4)
32737 .nr(2)
32738 .kr(1)
32739 .sr(1)
32740 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080032741 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080032742 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080032743 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080032744 }
32745 }
32746}
32747
32748TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_gt_2_strided_cn) {
32749 for (uint32_t n = 3; n < 4; n++) {
32750 for (size_t k = 1; k <= 5; k += 2) {
32751 GemmMicrokernelTester()
32752 .mr(4)
32753 .nr(2)
32754 .kr(1)
32755 .sr(1)
32756 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080032757 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080032758 .k(k)
32759 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080032760 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080032761 }
32762 }
32763}
32764
32765TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_gt_2_subtile) {
32766 for (uint32_t n = 3; n < 4; n++) {
32767 for (size_t k = 1; k <= 5; k += 2) {
32768 for (uint32_t m = 1; m <= 4; m++) {
32769 GemmMicrokernelTester()
32770 .mr(4)
32771 .nr(2)
32772 .kr(1)
32773 .sr(1)
32774 .m(m)
32775 .n(n)
32776 .k(k)
32777 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080032778 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080032779 }
32780 }
32781 }
32782}
32783
32784TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_div_2) {
32785 for (uint32_t n = 4; n <= 6; n += 2) {
32786 for (size_t k = 1; k <= 5; k += 2) {
32787 GemmMicrokernelTester()
32788 .mr(4)
32789 .nr(2)
32790 .kr(1)
32791 .sr(1)
32792 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080032793 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080032794 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080032795 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080032796 }
32797 }
32798}
32799
32800TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_div_2_strided_cn) {
32801 for (uint32_t n = 4; n <= 6; n += 2) {
32802 for (size_t k = 1; k <= 5; k += 2) {
32803 GemmMicrokernelTester()
32804 .mr(4)
32805 .nr(2)
32806 .kr(1)
32807 .sr(1)
32808 .m(4)
32809 .n(n)
32810 .k(k)
32811 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080032812 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080032813 }
32814 }
32815}
32816
32817TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_div_2_subtile) {
32818 for (uint32_t n = 4; n <= 6; n += 2) {
32819 for (size_t k = 1; k <= 5; k += 2) {
32820 for (uint32_t m = 1; m <= 4; m++) {
32821 GemmMicrokernelTester()
32822 .mr(4)
32823 .nr(2)
32824 .kr(1)
32825 .sr(1)
32826 .m(m)
32827 .n(n)
32828 .k(k)
32829 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080032830 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080032831 }
32832 }
32833 }
32834}
32835
32836TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, small_kernel) {
32837 for (size_t k = 1; k <= 5; k += 2) {
32838 GemmMicrokernelTester()
32839 .mr(4)
32840 .nr(2)
32841 .kr(1)
32842 .sr(1)
32843 .m(4)
32844 .n(2)
32845 .k(k)
32846 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080032847 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080032848 }
32849}
32850
32851TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, small_kernel_subtile) {
32852 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080032853 for (uint32_t n = 1; n <= 2; n++) {
32854 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080032855 GemmMicrokernelTester()
32856 .mr(4)
32857 .nr(2)
32858 .kr(1)
32859 .sr(1)
32860 .m(m)
32861 .n(n)
32862 .k(k)
32863 .ks(3)
32864 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080032865 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080032866 }
32867 }
32868 }
32869}
32870
32871TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_gt_2_small_kernel) {
32872 for (uint32_t n = 3; n < 4; n++) {
32873 for (size_t k = 1; k <= 5; k += 2) {
32874 GemmMicrokernelTester()
32875 .mr(4)
32876 .nr(2)
32877 .kr(1)
32878 .sr(1)
32879 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080032880 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080032881 .k(k)
32882 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080032883 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080032884 }
32885 }
32886}
32887
32888TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_div_2_small_kernel) {
32889 for (uint32_t n = 4; n <= 6; n += 2) {
32890 for (size_t k = 1; k <= 5; k += 2) {
32891 GemmMicrokernelTester()
32892 .mr(4)
32893 .nr(2)
32894 .kr(1)
32895 .sr(1)
32896 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080032897 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080032898 .k(k)
32899 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080032900 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080032901 }
32902 }
32903}
32904
32905TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, strided_cm_subtile) {
32906 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080032907 for (uint32_t n = 1; n <= 2; n++) {
32908 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080032909 GemmMicrokernelTester()
32910 .mr(4)
32911 .nr(2)
32912 .kr(1)
32913 .sr(1)
32914 .m(m)
32915 .n(n)
32916 .k(k)
32917 .cm_stride(5)
32918 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080032919 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080032920 }
32921 }
32922 }
32923}
32924
32925TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, a_offset) {
32926 for (size_t k = 1; k <= 5; k += 2) {
32927 GemmMicrokernelTester()
32928 .mr(4)
32929 .nr(2)
32930 .kr(1)
32931 .sr(1)
32932 .m(4)
32933 .n(2)
32934 .k(k)
32935 .ks(3)
32936 .a_offset(23)
Marat Dukhan50323b82022-01-11 00:12:01 -080032937 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080032938 }
32939}
32940
32941TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080032942 for (size_t k = 1; k <= 5; k += 2) {
32943 for (uint32_t mz = 0; mz < 4; mz++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080032944 GemmMicrokernelTester()
32945 .mr(4)
32946 .nr(2)
32947 .kr(1)
32948 .sr(1)
32949 .m(4)
32950 .n(2)
32951 .k(k)
32952 .ks(3)
32953 .a_offset(23)
32954 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080032955 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080032956 }
32957 }
32958}
32959
32960TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, qmin) {
32961 GemmMicrokernelTester()
32962 .mr(4)
32963 .nr(2)
32964 .kr(1)
32965 .sr(1)
32966 .m(4)
32967 .n(2)
32968 .k(1)
32969 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080032970 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080032971}
32972
32973TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, qmax) {
32974 GemmMicrokernelTester()
32975 .mr(4)
32976 .nr(2)
32977 .kr(1)
32978 .sr(1)
32979 .m(4)
32980 .n(2)
32981 .k(1)
32982 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080032983 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080032984}
32985
32986TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, strided_cm) {
32987 GemmMicrokernelTester()
32988 .mr(4)
32989 .nr(2)
32990 .kr(1)
32991 .sr(1)
32992 .m(4)
32993 .n(2)
32994 .k(1)
32995 .cm_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080032996 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080032997}
32998
32999TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, no_a_zero_point) {
33000 for (size_t k = 1; k <= 5; k += 2) {
33001 GemmMicrokernelTester()
33002 .mr(4)
33003 .nr(2)
33004 .kr(1)
33005 .sr(1)
33006 .m(4)
33007 .n(2)
33008 .k(k)
33009 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080033010 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080033011 }
33012}
33013
33014TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, no_b_zero_point) {
33015 for (size_t k = 1; k <= 5; k += 2) {
33016 GemmMicrokernelTester()
33017 .mr(4)
33018 .nr(2)
33019 .kr(1)
33020 .sr(1)
33021 .m(4)
33022 .n(2)
33023 .k(k)
33024 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080033025 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080033026 }
33027}
33028
33029TEST(QU8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, no_zero_point) {
33030 for (size_t k = 1; k <= 5; k += 2) {
33031 GemmMicrokernelTester()
33032 .mr(4)
33033 .nr(2)
33034 .kr(1)
33035 .sr(1)
33036 .m(4)
33037 .n(2)
33038 .k(k)
33039 .a_zero_point(0)
33040 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080033041 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080033042 }
33043}
33044
Marat Dukhan272d4d92022-01-04 15:07:14 -080033045TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_eq_1) {
33046 GemmMicrokernelTester()
33047 .mr(3)
33048 .nr(4)
33049 .kr(1)
33050 .sr(1)
33051 .m(3)
33052 .n(4)
33053 .k(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080033054 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080033055}
33056
33057TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, strided_cn) {
33058 GemmMicrokernelTester()
33059 .mr(3)
33060 .nr(4)
33061 .kr(1)
33062 .sr(1)
33063 .m(3)
33064 .n(4)
33065 .k(1)
33066 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080033067 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080033068}
33069
33070TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080033071 for (uint32_t n = 1; n <= 4; n++) {
33072 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080033073 GemmMicrokernelTester()
33074 .mr(3)
33075 .nr(4)
33076 .kr(1)
33077 .sr(1)
33078 .m(m)
33079 .n(n)
33080 .k(1)
33081 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080033082 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080033083 }
33084 }
33085}
33086
33087TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_eq_1_subtile_m) {
33088 for (uint32_t m = 1; m <= 3; m++) {
33089 GemmMicrokernelTester()
33090 .mr(3)
33091 .nr(4)
33092 .kr(1)
33093 .sr(1)
33094 .m(m)
33095 .n(4)
33096 .k(1)
33097 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080033098 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080033099 }
33100}
33101
33102TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_eq_1_subtile_n) {
33103 for (uint32_t n = 1; n <= 4; n++) {
33104 GemmMicrokernelTester()
33105 .mr(3)
33106 .nr(4)
33107 .kr(1)
33108 .sr(1)
33109 .m(3)
33110 .n(n)
33111 .k(1)
33112 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080033113 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080033114 }
33115}
33116
33117TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_gt_1) {
33118 for (size_t k = 2; k < 10; k++) {
33119 GemmMicrokernelTester()
33120 .mr(3)
33121 .nr(4)
33122 .kr(1)
33123 .sr(1)
33124 .m(3)
33125 .n(4)
33126 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080033127 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080033128 }
33129}
33130
33131TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_gt_1_subtile) {
33132 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080033133 for (uint32_t n = 1; n <= 4; n++) {
33134 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080033135 GemmMicrokernelTester()
33136 .mr(3)
33137 .nr(4)
33138 .kr(1)
33139 .sr(1)
33140 .m(m)
33141 .n(n)
33142 .k(k)
33143 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080033144 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080033145 }
33146 }
33147 }
33148}
33149
33150TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_gt_4) {
33151 for (uint32_t n = 5; n < 8; n++) {
33152 for (size_t k = 1; k <= 5; k += 2) {
33153 GemmMicrokernelTester()
33154 .mr(3)
33155 .nr(4)
33156 .kr(1)
33157 .sr(1)
33158 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080033159 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080033160 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080033161 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080033162 }
33163 }
33164}
33165
33166TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_gt_4_strided_cn) {
33167 for (uint32_t n = 5; n < 8; n++) {
33168 for (size_t k = 1; k <= 5; k += 2) {
33169 GemmMicrokernelTester()
33170 .mr(3)
33171 .nr(4)
33172 .kr(1)
33173 .sr(1)
33174 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080033175 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080033176 .k(k)
33177 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080033178 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080033179 }
33180 }
33181}
33182
33183TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_gt_4_subtile) {
33184 for (uint32_t n = 5; n < 8; n++) {
33185 for (size_t k = 1; k <= 5; k += 2) {
33186 for (uint32_t m = 1; m <= 3; m++) {
33187 GemmMicrokernelTester()
33188 .mr(3)
33189 .nr(4)
33190 .kr(1)
33191 .sr(1)
33192 .m(m)
33193 .n(n)
33194 .k(k)
33195 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080033196 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080033197 }
33198 }
33199 }
33200}
33201
33202TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_div_4) {
33203 for (uint32_t n = 8; n <= 12; n += 4) {
33204 for (size_t k = 1; k <= 5; k += 2) {
33205 GemmMicrokernelTester()
33206 .mr(3)
33207 .nr(4)
33208 .kr(1)
33209 .sr(1)
33210 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080033211 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080033212 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080033213 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080033214 }
33215 }
33216}
33217
33218TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_div_4_strided_cn) {
33219 for (uint32_t n = 8; n <= 12; n += 4) {
33220 for (size_t k = 1; k <= 5; k += 2) {
33221 GemmMicrokernelTester()
33222 .mr(3)
33223 .nr(4)
33224 .kr(1)
33225 .sr(1)
33226 .m(3)
33227 .n(n)
33228 .k(k)
33229 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080033230 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080033231 }
33232 }
33233}
33234
33235TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_div_4_subtile) {
33236 for (uint32_t n = 8; n <= 12; n += 4) {
33237 for (size_t k = 1; k <= 5; k += 2) {
33238 for (uint32_t m = 1; m <= 3; m++) {
33239 GemmMicrokernelTester()
33240 .mr(3)
33241 .nr(4)
33242 .kr(1)
33243 .sr(1)
33244 .m(m)
33245 .n(n)
33246 .k(k)
33247 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080033248 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080033249 }
33250 }
33251 }
33252}
33253
33254TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, small_kernel) {
33255 for (size_t k = 1; k <= 5; k += 2) {
33256 GemmMicrokernelTester()
33257 .mr(3)
33258 .nr(4)
33259 .kr(1)
33260 .sr(1)
33261 .m(3)
33262 .n(4)
33263 .k(k)
33264 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080033265 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080033266 }
33267}
33268
33269TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, small_kernel_subtile) {
33270 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080033271 for (uint32_t n = 1; n <= 4; n++) {
33272 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080033273 GemmMicrokernelTester()
33274 .mr(3)
33275 .nr(4)
33276 .kr(1)
33277 .sr(1)
33278 .m(m)
33279 .n(n)
33280 .k(k)
33281 .ks(3)
33282 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080033283 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080033284 }
33285 }
33286 }
33287}
33288
33289TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_gt_4_small_kernel) {
33290 for (uint32_t n = 5; n < 8; n++) {
33291 for (size_t k = 1; k <= 5; k += 2) {
33292 GemmMicrokernelTester()
33293 .mr(3)
33294 .nr(4)
33295 .kr(1)
33296 .sr(1)
33297 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080033298 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080033299 .k(k)
33300 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080033301 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080033302 }
33303 }
33304}
33305
33306TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_div_4_small_kernel) {
33307 for (uint32_t n = 8; n <= 12; n += 4) {
33308 for (size_t k = 1; k <= 5; k += 2) {
33309 GemmMicrokernelTester()
33310 .mr(3)
33311 .nr(4)
33312 .kr(1)
33313 .sr(1)
33314 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080033315 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080033316 .k(k)
33317 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080033318 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080033319 }
33320 }
33321}
33322
33323TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, strided_cm_subtile) {
33324 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080033325 for (uint32_t n = 1; n <= 4; n++) {
33326 for (uint32_t m = 1; m <= 3; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080033327 GemmMicrokernelTester()
33328 .mr(3)
33329 .nr(4)
33330 .kr(1)
33331 .sr(1)
33332 .m(m)
33333 .n(n)
33334 .k(k)
33335 .cm_stride(7)
33336 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080033337 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080033338 }
33339 }
33340 }
33341}
33342
33343TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, a_offset) {
33344 for (size_t k = 1; k <= 5; k += 2) {
33345 GemmMicrokernelTester()
33346 .mr(3)
33347 .nr(4)
33348 .kr(1)
33349 .sr(1)
33350 .m(3)
33351 .n(4)
33352 .k(k)
33353 .ks(3)
33354 .a_offset(17)
Marat Dukhan50323b82022-01-11 00:12:01 -080033355 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080033356 }
33357}
33358
33359TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080033360 for (size_t k = 1; k <= 5; k += 2) {
33361 for (uint32_t mz = 0; mz < 3; mz++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080033362 GemmMicrokernelTester()
33363 .mr(3)
33364 .nr(4)
33365 .kr(1)
33366 .sr(1)
33367 .m(3)
33368 .n(4)
33369 .k(k)
33370 .ks(3)
33371 .a_offset(17)
33372 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080033373 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080033374 }
33375 }
33376}
33377
33378TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, qmin) {
33379 GemmMicrokernelTester()
33380 .mr(3)
33381 .nr(4)
33382 .kr(1)
33383 .sr(1)
33384 .m(3)
33385 .n(4)
33386 .k(1)
33387 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080033388 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080033389}
33390
33391TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, qmax) {
33392 GemmMicrokernelTester()
33393 .mr(3)
33394 .nr(4)
33395 .kr(1)
33396 .sr(1)
33397 .m(3)
33398 .n(4)
33399 .k(1)
33400 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080033401 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080033402}
33403
33404TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, strided_cm) {
33405 GemmMicrokernelTester()
33406 .mr(3)
33407 .nr(4)
33408 .kr(1)
33409 .sr(1)
33410 .m(3)
33411 .n(4)
33412 .k(1)
33413 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080033414 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080033415}
33416
33417TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, no_a_zero_point) {
33418 for (size_t k = 1; k <= 5; k += 2) {
33419 GemmMicrokernelTester()
33420 .mr(3)
33421 .nr(4)
33422 .kr(1)
33423 .sr(1)
33424 .m(3)
33425 .n(4)
33426 .k(k)
33427 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080033428 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080033429 }
33430}
33431
33432TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, no_b_zero_point) {
33433 for (size_t k = 1; k <= 5; k += 2) {
33434 GemmMicrokernelTester()
33435 .mr(3)
33436 .nr(4)
33437 .kr(1)
33438 .sr(1)
33439 .m(3)
33440 .n(4)
33441 .k(k)
33442 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080033443 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080033444 }
33445}
33446
33447TEST(QU8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, no_zero_point) {
33448 for (size_t k = 1; k <= 5; k += 2) {
33449 GemmMicrokernelTester()
33450 .mr(3)
33451 .nr(4)
33452 .kr(1)
33453 .sr(1)
33454 .m(3)
33455 .n(4)
33456 .k(k)
33457 .a_zero_point(0)
33458 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080033459 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080033460 }
33461}
33462
33463TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_eq_1) {
33464 GemmMicrokernelTester()
33465 .mr(4)
33466 .nr(4)
33467 .kr(1)
33468 .sr(1)
33469 .m(4)
33470 .n(4)
33471 .k(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080033472 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080033473}
33474
33475TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, strided_cn) {
33476 GemmMicrokernelTester()
33477 .mr(4)
33478 .nr(4)
33479 .kr(1)
33480 .sr(1)
33481 .m(4)
33482 .n(4)
33483 .k(1)
33484 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080033485 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080033486}
33487
33488TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080033489 for (uint32_t n = 1; n <= 4; n++) {
33490 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080033491 GemmMicrokernelTester()
33492 .mr(4)
33493 .nr(4)
33494 .kr(1)
33495 .sr(1)
33496 .m(m)
33497 .n(n)
33498 .k(1)
33499 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080033500 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080033501 }
33502 }
33503}
33504
33505TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_eq_1_subtile_m) {
33506 for (uint32_t m = 1; m <= 4; m++) {
33507 GemmMicrokernelTester()
33508 .mr(4)
33509 .nr(4)
33510 .kr(1)
33511 .sr(1)
33512 .m(m)
33513 .n(4)
33514 .k(1)
33515 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080033516 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080033517 }
33518}
33519
33520TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_eq_1_subtile_n) {
33521 for (uint32_t n = 1; n <= 4; n++) {
33522 GemmMicrokernelTester()
33523 .mr(4)
33524 .nr(4)
33525 .kr(1)
33526 .sr(1)
33527 .m(4)
33528 .n(n)
33529 .k(1)
33530 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080033531 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080033532 }
33533}
33534
33535TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_gt_1) {
33536 for (size_t k = 2; k < 10; k++) {
33537 GemmMicrokernelTester()
33538 .mr(4)
33539 .nr(4)
33540 .kr(1)
33541 .sr(1)
33542 .m(4)
33543 .n(4)
33544 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080033545 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080033546 }
33547}
33548
33549TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_gt_1_subtile) {
33550 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080033551 for (uint32_t n = 1; n <= 4; n++) {
33552 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080033553 GemmMicrokernelTester()
33554 .mr(4)
33555 .nr(4)
33556 .kr(1)
33557 .sr(1)
33558 .m(m)
33559 .n(n)
33560 .k(k)
33561 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080033562 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080033563 }
33564 }
33565 }
33566}
33567
33568TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_gt_4) {
33569 for (uint32_t n = 5; n < 8; n++) {
33570 for (size_t k = 1; k <= 5; k += 2) {
33571 GemmMicrokernelTester()
33572 .mr(4)
33573 .nr(4)
33574 .kr(1)
33575 .sr(1)
33576 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080033577 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080033578 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080033579 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080033580 }
33581 }
33582}
33583
33584TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_gt_4_strided_cn) {
33585 for (uint32_t n = 5; n < 8; n++) {
33586 for (size_t k = 1; k <= 5; k += 2) {
33587 GemmMicrokernelTester()
33588 .mr(4)
33589 .nr(4)
33590 .kr(1)
33591 .sr(1)
33592 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080033593 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080033594 .k(k)
33595 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080033596 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080033597 }
33598 }
33599}
33600
33601TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_gt_4_subtile) {
33602 for (uint32_t n = 5; n < 8; n++) {
33603 for (size_t k = 1; k <= 5; k += 2) {
33604 for (uint32_t m = 1; m <= 4; m++) {
33605 GemmMicrokernelTester()
33606 .mr(4)
33607 .nr(4)
33608 .kr(1)
33609 .sr(1)
33610 .m(m)
33611 .n(n)
33612 .k(k)
33613 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080033614 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080033615 }
33616 }
33617 }
33618}
33619
33620TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_div_4) {
33621 for (uint32_t n = 8; n <= 12; n += 4) {
33622 for (size_t k = 1; k <= 5; k += 2) {
33623 GemmMicrokernelTester()
33624 .mr(4)
33625 .nr(4)
33626 .kr(1)
33627 .sr(1)
33628 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080033629 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080033630 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080033631 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080033632 }
33633 }
33634}
33635
33636TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_div_4_strided_cn) {
33637 for (uint32_t n = 8; n <= 12; n += 4) {
33638 for (size_t k = 1; k <= 5; k += 2) {
33639 GemmMicrokernelTester()
33640 .mr(4)
33641 .nr(4)
33642 .kr(1)
33643 .sr(1)
33644 .m(4)
33645 .n(n)
33646 .k(k)
33647 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080033648 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080033649 }
33650 }
33651}
33652
33653TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_div_4_subtile) {
33654 for (uint32_t n = 8; n <= 12; n += 4) {
33655 for (size_t k = 1; k <= 5; k += 2) {
33656 for (uint32_t m = 1; m <= 4; m++) {
33657 GemmMicrokernelTester()
33658 .mr(4)
33659 .nr(4)
33660 .kr(1)
33661 .sr(1)
33662 .m(m)
33663 .n(n)
33664 .k(k)
33665 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080033666 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080033667 }
33668 }
33669 }
33670}
33671
33672TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, small_kernel) {
33673 for (size_t k = 1; k <= 5; k += 2) {
33674 GemmMicrokernelTester()
33675 .mr(4)
33676 .nr(4)
33677 .kr(1)
33678 .sr(1)
33679 .m(4)
33680 .n(4)
33681 .k(k)
33682 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080033683 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080033684 }
33685}
33686
33687TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, small_kernel_subtile) {
33688 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080033689 for (uint32_t n = 1; n <= 4; n++) {
33690 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080033691 GemmMicrokernelTester()
33692 .mr(4)
33693 .nr(4)
33694 .kr(1)
33695 .sr(1)
33696 .m(m)
33697 .n(n)
33698 .k(k)
33699 .ks(3)
33700 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080033701 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080033702 }
33703 }
33704 }
33705}
33706
33707TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_gt_4_small_kernel) {
33708 for (uint32_t n = 5; n < 8; n++) {
33709 for (size_t k = 1; k <= 5; k += 2) {
33710 GemmMicrokernelTester()
33711 .mr(4)
33712 .nr(4)
33713 .kr(1)
33714 .sr(1)
33715 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080033716 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080033717 .k(k)
33718 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080033719 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080033720 }
33721 }
33722}
33723
33724TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_div_4_small_kernel) {
33725 for (uint32_t n = 8; n <= 12; n += 4) {
33726 for (size_t k = 1; k <= 5; k += 2) {
33727 GemmMicrokernelTester()
33728 .mr(4)
33729 .nr(4)
33730 .kr(1)
33731 .sr(1)
33732 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080033733 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080033734 .k(k)
33735 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080033736 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080033737 }
33738 }
33739}
33740
33741TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, strided_cm_subtile) {
33742 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080033743 for (uint32_t n = 1; n <= 4; n++) {
33744 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080033745 GemmMicrokernelTester()
33746 .mr(4)
33747 .nr(4)
33748 .kr(1)
33749 .sr(1)
33750 .m(m)
33751 .n(n)
33752 .k(k)
33753 .cm_stride(7)
33754 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080033755 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080033756 }
33757 }
33758 }
33759}
33760
33761TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, a_offset) {
33762 for (size_t k = 1; k <= 5; k += 2) {
33763 GemmMicrokernelTester()
33764 .mr(4)
33765 .nr(4)
33766 .kr(1)
33767 .sr(1)
33768 .m(4)
33769 .n(4)
33770 .k(k)
33771 .ks(3)
33772 .a_offset(23)
Marat Dukhan50323b82022-01-11 00:12:01 -080033773 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080033774 }
33775}
33776
33777TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080033778 for (size_t k = 1; k <= 5; k += 2) {
33779 for (uint32_t mz = 0; mz < 4; mz++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080033780 GemmMicrokernelTester()
33781 .mr(4)
33782 .nr(4)
33783 .kr(1)
33784 .sr(1)
33785 .m(4)
33786 .n(4)
33787 .k(k)
33788 .ks(3)
33789 .a_offset(23)
33790 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080033791 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080033792 }
33793 }
33794}
33795
33796TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, qmin) {
33797 GemmMicrokernelTester()
33798 .mr(4)
33799 .nr(4)
33800 .kr(1)
33801 .sr(1)
33802 .m(4)
33803 .n(4)
33804 .k(1)
33805 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080033806 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080033807}
33808
33809TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, qmax) {
33810 GemmMicrokernelTester()
33811 .mr(4)
33812 .nr(4)
33813 .kr(1)
33814 .sr(1)
33815 .m(4)
33816 .n(4)
33817 .k(1)
33818 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080033819 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080033820}
33821
33822TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, strided_cm) {
33823 GemmMicrokernelTester()
33824 .mr(4)
33825 .nr(4)
33826 .kr(1)
33827 .sr(1)
33828 .m(4)
33829 .n(4)
33830 .k(1)
33831 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080033832 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080033833}
33834
33835TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, no_a_zero_point) {
33836 for (size_t k = 1; k <= 5; k += 2) {
33837 GemmMicrokernelTester()
33838 .mr(4)
33839 .nr(4)
33840 .kr(1)
33841 .sr(1)
33842 .m(4)
33843 .n(4)
33844 .k(k)
33845 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080033846 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080033847 }
33848}
33849
33850TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, no_b_zero_point) {
33851 for (size_t k = 1; k <= 5; k += 2) {
33852 GemmMicrokernelTester()
33853 .mr(4)
33854 .nr(4)
33855 .kr(1)
33856 .sr(1)
33857 .m(4)
33858 .n(4)
33859 .k(k)
33860 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080033861 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080033862 }
33863}
33864
33865TEST(QU8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, no_zero_point) {
33866 for (size_t k = 1; k <= 5; k += 2) {
33867 GemmMicrokernelTester()
33868 .mr(4)
33869 .nr(4)
33870 .kr(1)
33871 .sr(1)
33872 .m(4)
33873 .n(4)
33874 .k(k)
33875 .a_zero_point(0)
33876 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080033877 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080033878 }
33879}
33880
33881TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1) {
33882 GemmMicrokernelTester()
33883 .mr(1)
33884 .nr(2)
33885 .kr(1)
33886 .sr(1)
33887 .m(1)
33888 .n(2)
33889 .k(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080033890 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080033891}
33892
33893TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, strided_cn) {
33894 GemmMicrokernelTester()
33895 .mr(1)
33896 .nr(2)
33897 .kr(1)
33898 .sr(1)
33899 .m(1)
33900 .n(2)
33901 .k(1)
33902 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080033903 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080033904}
33905
33906TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080033907 for (uint32_t n = 1; n <= 2; n++) {
33908 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080033909 GemmMicrokernelTester()
33910 .mr(1)
33911 .nr(2)
33912 .kr(1)
33913 .sr(1)
33914 .m(m)
33915 .n(n)
33916 .k(1)
33917 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080033918 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080033919 }
33920 }
33921}
33922
33923TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
33924 for (uint32_t m = 1; m <= 1; m++) {
33925 GemmMicrokernelTester()
33926 .mr(1)
33927 .nr(2)
33928 .kr(1)
33929 .sr(1)
33930 .m(m)
33931 .n(2)
33932 .k(1)
33933 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080033934 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080033935 }
33936}
33937
33938TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
33939 for (uint32_t n = 1; n <= 2; n++) {
33940 GemmMicrokernelTester()
33941 .mr(1)
33942 .nr(2)
33943 .kr(1)
33944 .sr(1)
33945 .m(1)
33946 .n(n)
33947 .k(1)
33948 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080033949 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080033950 }
33951}
33952
33953TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_gt_1) {
33954 for (size_t k = 2; k < 10; k++) {
33955 GemmMicrokernelTester()
33956 .mr(1)
33957 .nr(2)
33958 .kr(1)
33959 .sr(1)
33960 .m(1)
33961 .n(2)
33962 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080033963 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080033964 }
33965}
33966
33967TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_gt_1_subtile) {
33968 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080033969 for (uint32_t n = 1; n <= 2; n++) {
33970 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080033971 GemmMicrokernelTester()
33972 .mr(1)
33973 .nr(2)
33974 .kr(1)
33975 .sr(1)
33976 .m(m)
33977 .n(n)
33978 .k(k)
33979 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080033980 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080033981 }
33982 }
33983 }
33984}
33985
33986TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2) {
33987 for (uint32_t n = 3; n < 4; n++) {
33988 for (size_t k = 1; k <= 5; k += 2) {
33989 GemmMicrokernelTester()
33990 .mr(1)
33991 .nr(2)
33992 .kr(1)
33993 .sr(1)
33994 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080033995 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080033996 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080033997 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080033998 }
33999 }
34000}
34001
34002TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
34003 for (uint32_t n = 3; n < 4; n++) {
34004 for (size_t k = 1; k <= 5; k += 2) {
34005 GemmMicrokernelTester()
34006 .mr(1)
34007 .nr(2)
34008 .kr(1)
34009 .sr(1)
34010 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080034011 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080034012 .k(k)
34013 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080034014 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080034015 }
34016 }
34017}
34018
34019TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2_subtile) {
34020 for (uint32_t n = 3; n < 4; n++) {
34021 for (size_t k = 1; k <= 5; k += 2) {
34022 for (uint32_t m = 1; m <= 1; m++) {
34023 GemmMicrokernelTester()
34024 .mr(1)
34025 .nr(2)
34026 .kr(1)
34027 .sr(1)
34028 .m(m)
34029 .n(n)
34030 .k(k)
34031 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080034032 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080034033 }
34034 }
34035 }
34036}
34037
34038TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2) {
34039 for (uint32_t n = 4; n <= 6; n += 2) {
34040 for (size_t k = 1; k <= 5; k += 2) {
34041 GemmMicrokernelTester()
34042 .mr(1)
34043 .nr(2)
34044 .kr(1)
34045 .sr(1)
34046 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080034047 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080034048 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080034049 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080034050 }
34051 }
34052}
34053
34054TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2_strided_cn) {
34055 for (uint32_t n = 4; n <= 6; n += 2) {
34056 for (size_t k = 1; k <= 5; k += 2) {
34057 GemmMicrokernelTester()
34058 .mr(1)
34059 .nr(2)
34060 .kr(1)
34061 .sr(1)
34062 .m(1)
34063 .n(n)
34064 .k(k)
34065 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080034066 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080034067 }
34068 }
34069}
34070
34071TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2_subtile) {
34072 for (uint32_t n = 4; n <= 6; n += 2) {
34073 for (size_t k = 1; k <= 5; k += 2) {
34074 for (uint32_t m = 1; m <= 1; m++) {
34075 GemmMicrokernelTester()
34076 .mr(1)
34077 .nr(2)
34078 .kr(1)
34079 .sr(1)
34080 .m(m)
34081 .n(n)
34082 .k(k)
34083 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080034084 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080034085 }
34086 }
34087 }
34088}
34089
34090TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, small_kernel) {
34091 for (size_t k = 1; k <= 5; k += 2) {
34092 GemmMicrokernelTester()
34093 .mr(1)
34094 .nr(2)
34095 .kr(1)
34096 .sr(1)
34097 .m(1)
34098 .n(2)
34099 .k(k)
34100 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080034101 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080034102 }
34103}
34104
34105TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, small_kernel_subtile) {
34106 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080034107 for (uint32_t n = 1; n <= 2; n++) {
34108 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080034109 GemmMicrokernelTester()
34110 .mr(1)
34111 .nr(2)
34112 .kr(1)
34113 .sr(1)
34114 .m(m)
34115 .n(n)
34116 .k(k)
34117 .ks(3)
34118 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080034119 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080034120 }
34121 }
34122 }
34123}
34124
34125TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2_small_kernel) {
34126 for (uint32_t n = 3; n < 4; n++) {
34127 for (size_t k = 1; k <= 5; k += 2) {
34128 GemmMicrokernelTester()
34129 .mr(1)
34130 .nr(2)
34131 .kr(1)
34132 .sr(1)
34133 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080034134 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080034135 .k(k)
34136 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080034137 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080034138 }
34139 }
34140}
34141
34142TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2_small_kernel) {
34143 for (uint32_t n = 4; n <= 6; n += 2) {
34144 for (size_t k = 1; k <= 5; k += 2) {
34145 GemmMicrokernelTester()
34146 .mr(1)
34147 .nr(2)
34148 .kr(1)
34149 .sr(1)
34150 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080034151 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080034152 .k(k)
34153 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080034154 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080034155 }
34156 }
34157}
34158
34159TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, strided_cm_subtile) {
34160 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080034161 for (uint32_t n = 1; n <= 2; n++) {
34162 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080034163 GemmMicrokernelTester()
34164 .mr(1)
34165 .nr(2)
34166 .kr(1)
34167 .sr(1)
34168 .m(m)
34169 .n(n)
34170 .k(k)
34171 .cm_stride(5)
34172 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080034173 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080034174 }
34175 }
34176 }
34177}
34178
34179TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, a_offset) {
34180 for (size_t k = 1; k <= 5; k += 2) {
34181 GemmMicrokernelTester()
34182 .mr(1)
34183 .nr(2)
34184 .kr(1)
34185 .sr(1)
34186 .m(1)
34187 .n(2)
34188 .k(k)
34189 .ks(3)
34190 .a_offset(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080034191 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080034192 }
34193}
34194
34195TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080034196 for (size_t k = 1; k <= 5; k += 2) {
34197 for (uint32_t mz = 0; mz < 1; mz++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080034198 GemmMicrokernelTester()
34199 .mr(1)
34200 .nr(2)
34201 .kr(1)
34202 .sr(1)
34203 .m(1)
34204 .n(2)
34205 .k(k)
34206 .ks(3)
34207 .a_offset(7)
34208 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080034209 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080034210 }
34211 }
34212}
34213
34214TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, qmin) {
34215 GemmMicrokernelTester()
34216 .mr(1)
34217 .nr(2)
34218 .kr(1)
34219 .sr(1)
34220 .m(1)
34221 .n(2)
34222 .k(1)
34223 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080034224 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080034225}
34226
34227TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, qmax) {
34228 GemmMicrokernelTester()
34229 .mr(1)
34230 .nr(2)
34231 .kr(1)
34232 .sr(1)
34233 .m(1)
34234 .n(2)
34235 .k(1)
34236 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080034237 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080034238}
34239
34240TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, strided_cm) {
34241 GemmMicrokernelTester()
34242 .mr(1)
34243 .nr(2)
34244 .kr(1)
34245 .sr(1)
34246 .m(1)
34247 .n(2)
34248 .k(1)
34249 .cm_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080034250 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080034251}
34252
34253TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, no_a_zero_point) {
34254 for (size_t k = 1; k <= 5; k += 2) {
34255 GemmMicrokernelTester()
34256 .mr(1)
34257 .nr(2)
34258 .kr(1)
34259 .sr(1)
34260 .m(1)
34261 .n(2)
34262 .k(k)
34263 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080034264 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080034265 }
34266}
34267
34268TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, no_b_zero_point) {
34269 for (size_t k = 1; k <= 5; k += 2) {
34270 GemmMicrokernelTester()
34271 .mr(1)
34272 .nr(2)
34273 .kr(1)
34274 .sr(1)
34275 .m(1)
34276 .n(2)
34277 .k(k)
34278 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080034279 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080034280 }
34281}
34282
34283TEST(QU8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, no_zero_point) {
34284 for (size_t k = 1; k <= 5; k += 2) {
34285 GemmMicrokernelTester()
34286 .mr(1)
34287 .nr(2)
34288 .kr(1)
34289 .sr(1)
34290 .m(1)
34291 .n(2)
34292 .k(k)
34293 .a_zero_point(0)
34294 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080034295 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080034296 }
34297}
34298
34299TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1) {
34300 GemmMicrokernelTester()
34301 .mr(2)
34302 .nr(2)
34303 .kr(1)
34304 .sr(1)
34305 .m(2)
34306 .n(2)
34307 .k(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080034308 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080034309}
34310
34311TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, strided_cn) {
34312 GemmMicrokernelTester()
34313 .mr(2)
34314 .nr(2)
34315 .kr(1)
34316 .sr(1)
34317 .m(2)
34318 .n(2)
34319 .k(1)
34320 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080034321 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080034322}
34323
34324TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080034325 for (uint32_t n = 1; n <= 2; n++) {
34326 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080034327 GemmMicrokernelTester()
34328 .mr(2)
34329 .nr(2)
34330 .kr(1)
34331 .sr(1)
34332 .m(m)
34333 .n(n)
34334 .k(1)
34335 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080034336 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080034337 }
34338 }
34339}
34340
34341TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
34342 for (uint32_t m = 1; m <= 2; m++) {
34343 GemmMicrokernelTester()
34344 .mr(2)
34345 .nr(2)
34346 .kr(1)
34347 .sr(1)
34348 .m(m)
34349 .n(2)
34350 .k(1)
34351 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080034352 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080034353 }
34354}
34355
34356TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
34357 for (uint32_t n = 1; n <= 2; n++) {
34358 GemmMicrokernelTester()
34359 .mr(2)
34360 .nr(2)
34361 .kr(1)
34362 .sr(1)
34363 .m(2)
34364 .n(n)
34365 .k(1)
34366 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080034367 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080034368 }
34369}
34370
34371TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_gt_1) {
34372 for (size_t k = 2; k < 10; k++) {
34373 GemmMicrokernelTester()
34374 .mr(2)
34375 .nr(2)
34376 .kr(1)
34377 .sr(1)
34378 .m(2)
34379 .n(2)
34380 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080034381 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080034382 }
34383}
34384
34385TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_gt_1_subtile) {
34386 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080034387 for (uint32_t n = 1; n <= 2; n++) {
34388 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080034389 GemmMicrokernelTester()
34390 .mr(2)
34391 .nr(2)
34392 .kr(1)
34393 .sr(1)
34394 .m(m)
34395 .n(n)
34396 .k(k)
34397 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080034398 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080034399 }
34400 }
34401 }
34402}
34403
34404TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2) {
34405 for (uint32_t n = 3; n < 4; n++) {
34406 for (size_t k = 1; k <= 5; k += 2) {
34407 GemmMicrokernelTester()
34408 .mr(2)
34409 .nr(2)
34410 .kr(1)
34411 .sr(1)
34412 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080034413 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080034414 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080034415 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080034416 }
34417 }
34418}
34419
34420TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
34421 for (uint32_t n = 3; n < 4; n++) {
34422 for (size_t k = 1; k <= 5; k += 2) {
34423 GemmMicrokernelTester()
34424 .mr(2)
34425 .nr(2)
34426 .kr(1)
34427 .sr(1)
34428 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080034429 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080034430 .k(k)
34431 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080034432 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080034433 }
34434 }
34435}
34436
34437TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2_subtile) {
34438 for (uint32_t n = 3; n < 4; n++) {
34439 for (size_t k = 1; k <= 5; k += 2) {
34440 for (uint32_t m = 1; m <= 2; m++) {
34441 GemmMicrokernelTester()
34442 .mr(2)
34443 .nr(2)
34444 .kr(1)
34445 .sr(1)
34446 .m(m)
34447 .n(n)
34448 .k(k)
34449 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080034450 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080034451 }
34452 }
34453 }
34454}
34455
34456TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2) {
34457 for (uint32_t n = 4; n <= 6; n += 2) {
34458 for (size_t k = 1; k <= 5; k += 2) {
34459 GemmMicrokernelTester()
34460 .mr(2)
34461 .nr(2)
34462 .kr(1)
34463 .sr(1)
34464 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080034465 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080034466 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080034467 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080034468 }
34469 }
34470}
34471
34472TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2_strided_cn) {
34473 for (uint32_t n = 4; n <= 6; n += 2) {
34474 for (size_t k = 1; k <= 5; k += 2) {
34475 GemmMicrokernelTester()
34476 .mr(2)
34477 .nr(2)
34478 .kr(1)
34479 .sr(1)
34480 .m(2)
34481 .n(n)
34482 .k(k)
34483 .cn_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080034484 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080034485 }
34486 }
34487}
34488
34489TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2_subtile) {
34490 for (uint32_t n = 4; n <= 6; n += 2) {
34491 for (size_t k = 1; k <= 5; k += 2) {
34492 for (uint32_t m = 1; m <= 2; m++) {
34493 GemmMicrokernelTester()
34494 .mr(2)
34495 .nr(2)
34496 .kr(1)
34497 .sr(1)
34498 .m(m)
34499 .n(n)
34500 .k(k)
34501 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080034502 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080034503 }
34504 }
34505 }
34506}
34507
34508TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, small_kernel) {
34509 for (size_t k = 1; k <= 5; k += 2) {
34510 GemmMicrokernelTester()
34511 .mr(2)
34512 .nr(2)
34513 .kr(1)
34514 .sr(1)
34515 .m(2)
34516 .n(2)
34517 .k(k)
34518 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080034519 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080034520 }
34521}
34522
34523TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, small_kernel_subtile) {
34524 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080034525 for (uint32_t n = 1; n <= 2; n++) {
34526 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080034527 GemmMicrokernelTester()
34528 .mr(2)
34529 .nr(2)
34530 .kr(1)
34531 .sr(1)
34532 .m(m)
34533 .n(n)
34534 .k(k)
34535 .ks(3)
34536 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080034537 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080034538 }
34539 }
34540 }
34541}
34542
34543TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2_small_kernel) {
34544 for (uint32_t n = 3; n < 4; n++) {
34545 for (size_t k = 1; k <= 5; k += 2) {
34546 GemmMicrokernelTester()
34547 .mr(2)
34548 .nr(2)
34549 .kr(1)
34550 .sr(1)
34551 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080034552 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080034553 .k(k)
34554 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080034555 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080034556 }
34557 }
34558}
34559
34560TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2_small_kernel) {
34561 for (uint32_t n = 4; n <= 6; n += 2) {
34562 for (size_t k = 1; k <= 5; k += 2) {
34563 GemmMicrokernelTester()
34564 .mr(2)
34565 .nr(2)
34566 .kr(1)
34567 .sr(1)
34568 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080034569 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080034570 .k(k)
34571 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080034572 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080034573 }
34574 }
34575}
34576
34577TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, strided_cm_subtile) {
34578 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080034579 for (uint32_t n = 1; n <= 2; n++) {
34580 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080034581 GemmMicrokernelTester()
34582 .mr(2)
34583 .nr(2)
34584 .kr(1)
34585 .sr(1)
34586 .m(m)
34587 .n(n)
34588 .k(k)
34589 .cm_stride(5)
34590 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080034591 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080034592 }
34593 }
34594 }
34595}
34596
34597TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, a_offset) {
34598 for (size_t k = 1; k <= 5; k += 2) {
34599 GemmMicrokernelTester()
34600 .mr(2)
34601 .nr(2)
34602 .kr(1)
34603 .sr(1)
34604 .m(2)
34605 .n(2)
34606 .k(k)
34607 .ks(3)
34608 .a_offset(13)
Marat Dukhan50323b82022-01-11 00:12:01 -080034609 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080034610 }
34611}
34612
34613TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080034614 for (size_t k = 1; k <= 5; k += 2) {
34615 for (uint32_t mz = 0; mz < 2; mz++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080034616 GemmMicrokernelTester()
34617 .mr(2)
34618 .nr(2)
34619 .kr(1)
34620 .sr(1)
34621 .m(2)
34622 .n(2)
34623 .k(k)
34624 .ks(3)
34625 .a_offset(13)
34626 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080034627 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080034628 }
34629 }
34630}
34631
34632TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, qmin) {
34633 GemmMicrokernelTester()
34634 .mr(2)
34635 .nr(2)
34636 .kr(1)
34637 .sr(1)
34638 .m(2)
34639 .n(2)
34640 .k(1)
34641 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080034642 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080034643}
34644
34645TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, qmax) {
34646 GemmMicrokernelTester()
34647 .mr(2)
34648 .nr(2)
34649 .kr(1)
34650 .sr(1)
34651 .m(2)
34652 .n(2)
34653 .k(1)
34654 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080034655 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080034656}
34657
34658TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, strided_cm) {
34659 GemmMicrokernelTester()
34660 .mr(2)
34661 .nr(2)
34662 .kr(1)
34663 .sr(1)
34664 .m(2)
34665 .n(2)
34666 .k(1)
34667 .cm_stride(5)
Marat Dukhan50323b82022-01-11 00:12:01 -080034668 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080034669}
34670
34671TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, no_a_zero_point) {
34672 for (size_t k = 1; k <= 5; k += 2) {
34673 GemmMicrokernelTester()
34674 .mr(2)
34675 .nr(2)
34676 .kr(1)
34677 .sr(1)
34678 .m(2)
34679 .n(2)
34680 .k(k)
34681 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080034682 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080034683 }
34684}
34685
34686TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, no_b_zero_point) {
34687 for (size_t k = 1; k <= 5; k += 2) {
34688 GemmMicrokernelTester()
34689 .mr(2)
34690 .nr(2)
34691 .kr(1)
34692 .sr(1)
34693 .m(2)
34694 .n(2)
34695 .k(k)
34696 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080034697 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080034698 }
34699}
34700
34701TEST(QU8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, no_zero_point) {
34702 for (size_t k = 1; k <= 5; k += 2) {
34703 GemmMicrokernelTester()
34704 .mr(2)
34705 .nr(2)
34706 .kr(1)
34707 .sr(1)
34708 .m(2)
34709 .n(2)
34710 .k(k)
34711 .a_zero_point(0)
34712 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080034713 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080034714 }
34715}
34716
Marat Dukhan272d4d92022-01-04 15:07:14 -080034717TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1) {
34718 GemmMicrokernelTester()
34719 .mr(1)
34720 .nr(4)
34721 .kr(1)
34722 .sr(1)
34723 .m(1)
34724 .n(4)
34725 .k(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080034726 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080034727}
34728
34729TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, strided_cn) {
34730 GemmMicrokernelTester()
34731 .mr(1)
34732 .nr(4)
34733 .kr(1)
34734 .sr(1)
34735 .m(1)
34736 .n(4)
34737 .k(1)
34738 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080034739 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080034740}
34741
34742TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080034743 for (uint32_t n = 1; n <= 4; n++) {
34744 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080034745 GemmMicrokernelTester()
34746 .mr(1)
34747 .nr(4)
34748 .kr(1)
34749 .sr(1)
34750 .m(m)
34751 .n(n)
34752 .k(1)
34753 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080034754 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080034755 }
34756 }
34757}
34758
34759TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
34760 for (uint32_t m = 1; m <= 1; m++) {
34761 GemmMicrokernelTester()
34762 .mr(1)
34763 .nr(4)
34764 .kr(1)
34765 .sr(1)
34766 .m(m)
34767 .n(4)
34768 .k(1)
34769 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080034770 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080034771 }
34772}
34773
34774TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
34775 for (uint32_t n = 1; n <= 4; n++) {
34776 GemmMicrokernelTester()
34777 .mr(1)
34778 .nr(4)
34779 .kr(1)
34780 .sr(1)
34781 .m(1)
34782 .n(n)
34783 .k(1)
34784 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080034785 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080034786 }
34787}
34788
34789TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_gt_1) {
34790 for (size_t k = 2; k < 10; k++) {
34791 GemmMicrokernelTester()
34792 .mr(1)
34793 .nr(4)
34794 .kr(1)
34795 .sr(1)
34796 .m(1)
34797 .n(4)
34798 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080034799 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080034800 }
34801}
34802
34803TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_gt_1_subtile) {
34804 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080034805 for (uint32_t n = 1; n <= 4; n++) {
34806 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080034807 GemmMicrokernelTester()
34808 .mr(1)
34809 .nr(4)
34810 .kr(1)
34811 .sr(1)
34812 .m(m)
34813 .n(n)
34814 .k(k)
34815 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080034816 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080034817 }
34818 }
34819 }
34820}
34821
34822TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4) {
34823 for (uint32_t n = 5; n < 8; n++) {
34824 for (size_t k = 1; k <= 5; k += 2) {
34825 GemmMicrokernelTester()
34826 .mr(1)
34827 .nr(4)
34828 .kr(1)
34829 .sr(1)
34830 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080034831 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080034832 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080034833 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080034834 }
34835 }
34836}
34837
34838TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
34839 for (uint32_t n = 5; n < 8; n++) {
34840 for (size_t k = 1; k <= 5; k += 2) {
34841 GemmMicrokernelTester()
34842 .mr(1)
34843 .nr(4)
34844 .kr(1)
34845 .sr(1)
34846 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080034847 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080034848 .k(k)
34849 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080034850 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080034851 }
34852 }
34853}
34854
34855TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4_subtile) {
34856 for (uint32_t n = 5; n < 8; n++) {
34857 for (size_t k = 1; k <= 5; k += 2) {
34858 for (uint32_t m = 1; m <= 1; m++) {
34859 GemmMicrokernelTester()
34860 .mr(1)
34861 .nr(4)
34862 .kr(1)
34863 .sr(1)
34864 .m(m)
34865 .n(n)
34866 .k(k)
34867 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080034868 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080034869 }
34870 }
34871 }
34872}
34873
34874TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4) {
34875 for (uint32_t n = 8; n <= 12; n += 4) {
34876 for (size_t k = 1; k <= 5; k += 2) {
34877 GemmMicrokernelTester()
34878 .mr(1)
34879 .nr(4)
34880 .kr(1)
34881 .sr(1)
34882 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080034883 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080034884 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080034885 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080034886 }
34887 }
34888}
34889
34890TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4_strided_cn) {
34891 for (uint32_t n = 8; n <= 12; n += 4) {
34892 for (size_t k = 1; k <= 5; k += 2) {
34893 GemmMicrokernelTester()
34894 .mr(1)
34895 .nr(4)
34896 .kr(1)
34897 .sr(1)
34898 .m(1)
34899 .n(n)
34900 .k(k)
34901 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080034902 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080034903 }
34904 }
34905}
34906
34907TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4_subtile) {
34908 for (uint32_t n = 8; n <= 12; n += 4) {
34909 for (size_t k = 1; k <= 5; k += 2) {
34910 for (uint32_t m = 1; m <= 1; m++) {
34911 GemmMicrokernelTester()
34912 .mr(1)
34913 .nr(4)
34914 .kr(1)
34915 .sr(1)
34916 .m(m)
34917 .n(n)
34918 .k(k)
34919 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080034920 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080034921 }
34922 }
34923 }
34924}
34925
34926TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, small_kernel) {
34927 for (size_t k = 1; k <= 5; k += 2) {
34928 GemmMicrokernelTester()
34929 .mr(1)
34930 .nr(4)
34931 .kr(1)
34932 .sr(1)
34933 .m(1)
34934 .n(4)
34935 .k(k)
34936 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080034937 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080034938 }
34939}
34940
34941TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, small_kernel_subtile) {
34942 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080034943 for (uint32_t n = 1; n <= 4; n++) {
34944 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080034945 GemmMicrokernelTester()
34946 .mr(1)
34947 .nr(4)
34948 .kr(1)
34949 .sr(1)
34950 .m(m)
34951 .n(n)
34952 .k(k)
34953 .ks(3)
34954 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080034955 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080034956 }
34957 }
34958 }
34959}
34960
34961TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4_small_kernel) {
34962 for (uint32_t n = 5; n < 8; n++) {
34963 for (size_t k = 1; k <= 5; k += 2) {
34964 GemmMicrokernelTester()
34965 .mr(1)
34966 .nr(4)
34967 .kr(1)
34968 .sr(1)
34969 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080034970 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080034971 .k(k)
34972 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080034973 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080034974 }
34975 }
34976}
34977
34978TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4_small_kernel) {
34979 for (uint32_t n = 8; n <= 12; n += 4) {
34980 for (size_t k = 1; k <= 5; k += 2) {
34981 GemmMicrokernelTester()
34982 .mr(1)
34983 .nr(4)
34984 .kr(1)
34985 .sr(1)
34986 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080034987 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080034988 .k(k)
34989 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080034990 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080034991 }
34992 }
34993}
34994
34995TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, strided_cm_subtile) {
34996 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080034997 for (uint32_t n = 1; n <= 4; n++) {
34998 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080034999 GemmMicrokernelTester()
35000 .mr(1)
35001 .nr(4)
35002 .kr(1)
35003 .sr(1)
35004 .m(m)
35005 .n(n)
35006 .k(k)
35007 .cm_stride(7)
35008 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080035009 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080035010 }
35011 }
35012 }
35013}
35014
35015TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, a_offset) {
35016 for (size_t k = 1; k <= 5; k += 2) {
35017 GemmMicrokernelTester()
35018 .mr(1)
35019 .nr(4)
35020 .kr(1)
35021 .sr(1)
35022 .m(1)
35023 .n(4)
35024 .k(k)
35025 .ks(3)
35026 .a_offset(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080035027 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080035028 }
35029}
35030
35031TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080035032 for (size_t k = 1; k <= 5; k += 2) {
35033 for (uint32_t mz = 0; mz < 1; mz++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080035034 GemmMicrokernelTester()
35035 .mr(1)
35036 .nr(4)
35037 .kr(1)
35038 .sr(1)
35039 .m(1)
35040 .n(4)
35041 .k(k)
35042 .ks(3)
35043 .a_offset(7)
35044 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080035045 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080035046 }
35047 }
35048}
35049
35050TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, qmin) {
35051 GemmMicrokernelTester()
35052 .mr(1)
35053 .nr(4)
35054 .kr(1)
35055 .sr(1)
35056 .m(1)
35057 .n(4)
35058 .k(1)
35059 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080035060 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080035061}
35062
35063TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, qmax) {
35064 GemmMicrokernelTester()
35065 .mr(1)
35066 .nr(4)
35067 .kr(1)
35068 .sr(1)
35069 .m(1)
35070 .n(4)
35071 .k(1)
35072 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080035073 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080035074}
35075
35076TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, strided_cm) {
35077 GemmMicrokernelTester()
35078 .mr(1)
35079 .nr(4)
35080 .kr(1)
35081 .sr(1)
35082 .m(1)
35083 .n(4)
35084 .k(1)
35085 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080035086 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080035087}
35088
35089TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, no_a_zero_point) {
35090 for (size_t k = 1; k <= 5; k += 2) {
35091 GemmMicrokernelTester()
35092 .mr(1)
35093 .nr(4)
35094 .kr(1)
35095 .sr(1)
35096 .m(1)
35097 .n(4)
35098 .k(k)
35099 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080035100 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080035101 }
35102}
35103
35104TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, no_b_zero_point) {
35105 for (size_t k = 1; k <= 5; k += 2) {
35106 GemmMicrokernelTester()
35107 .mr(1)
35108 .nr(4)
35109 .kr(1)
35110 .sr(1)
35111 .m(1)
35112 .n(4)
35113 .k(k)
35114 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080035115 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080035116 }
35117}
35118
35119TEST(QU8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, no_zero_point) {
35120 for (size_t k = 1; k <= 5; k += 2) {
35121 GemmMicrokernelTester()
35122 .mr(1)
35123 .nr(4)
35124 .kr(1)
35125 .sr(1)
35126 .m(1)
35127 .n(4)
35128 .k(k)
35129 .a_zero_point(0)
35130 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080035131 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080035132 }
35133}
35134
35135TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1) {
35136 GemmMicrokernelTester()
35137 .mr(2)
35138 .nr(4)
35139 .kr(1)
35140 .sr(1)
35141 .m(2)
35142 .n(4)
35143 .k(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080035144 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080035145}
35146
35147TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, strided_cn) {
35148 GemmMicrokernelTester()
35149 .mr(2)
35150 .nr(4)
35151 .kr(1)
35152 .sr(1)
35153 .m(2)
35154 .n(4)
35155 .k(1)
35156 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080035157 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080035158}
35159
35160TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1_subtile) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080035161 for (uint32_t n = 1; n <= 4; n++) {
35162 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080035163 GemmMicrokernelTester()
35164 .mr(2)
35165 .nr(4)
35166 .kr(1)
35167 .sr(1)
35168 .m(m)
35169 .n(n)
35170 .k(1)
35171 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080035172 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080035173 }
35174 }
35175}
35176
35177TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
35178 for (uint32_t m = 1; m <= 2; m++) {
35179 GemmMicrokernelTester()
35180 .mr(2)
35181 .nr(4)
35182 .kr(1)
35183 .sr(1)
35184 .m(m)
35185 .n(4)
35186 .k(1)
35187 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080035188 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080035189 }
35190}
35191
35192TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
35193 for (uint32_t n = 1; n <= 4; n++) {
35194 GemmMicrokernelTester()
35195 .mr(2)
35196 .nr(4)
35197 .kr(1)
35198 .sr(1)
35199 .m(2)
35200 .n(n)
35201 .k(1)
35202 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080035203 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080035204 }
35205}
35206
35207TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_gt_1) {
35208 for (size_t k = 2; k < 10; k++) {
35209 GemmMicrokernelTester()
35210 .mr(2)
35211 .nr(4)
35212 .kr(1)
35213 .sr(1)
35214 .m(2)
35215 .n(4)
35216 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080035217 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080035218 }
35219}
35220
35221TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_gt_1_subtile) {
35222 for (size_t k = 2; k < 10; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080035223 for (uint32_t n = 1; n <= 4; n++) {
35224 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080035225 GemmMicrokernelTester()
35226 .mr(2)
35227 .nr(4)
35228 .kr(1)
35229 .sr(1)
35230 .m(m)
35231 .n(n)
35232 .k(k)
35233 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080035234 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080035235 }
35236 }
35237 }
35238}
35239
35240TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4) {
35241 for (uint32_t n = 5; n < 8; n++) {
35242 for (size_t k = 1; k <= 5; k += 2) {
35243 GemmMicrokernelTester()
35244 .mr(2)
35245 .nr(4)
35246 .kr(1)
35247 .sr(1)
35248 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080035249 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080035250 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080035251 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080035252 }
35253 }
35254}
35255
35256TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
35257 for (uint32_t n = 5; n < 8; n++) {
35258 for (size_t k = 1; k <= 5; k += 2) {
35259 GemmMicrokernelTester()
35260 .mr(2)
35261 .nr(4)
35262 .kr(1)
35263 .sr(1)
35264 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080035265 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080035266 .k(k)
35267 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080035268 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080035269 }
35270 }
35271}
35272
35273TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4_subtile) {
35274 for (uint32_t n = 5; n < 8; n++) {
35275 for (size_t k = 1; k <= 5; k += 2) {
35276 for (uint32_t m = 1; m <= 2; m++) {
35277 GemmMicrokernelTester()
35278 .mr(2)
35279 .nr(4)
35280 .kr(1)
35281 .sr(1)
35282 .m(m)
35283 .n(n)
35284 .k(k)
35285 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080035286 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080035287 }
35288 }
35289 }
35290}
35291
35292TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4) {
35293 for (uint32_t n = 8; n <= 12; n += 4) {
35294 for (size_t k = 1; k <= 5; k += 2) {
35295 GemmMicrokernelTester()
35296 .mr(2)
35297 .nr(4)
35298 .kr(1)
35299 .sr(1)
35300 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080035301 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080035302 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080035303 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080035304 }
35305 }
35306}
35307
35308TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4_strided_cn) {
35309 for (uint32_t n = 8; n <= 12; n += 4) {
35310 for (size_t k = 1; k <= 5; k += 2) {
35311 GemmMicrokernelTester()
35312 .mr(2)
35313 .nr(4)
35314 .kr(1)
35315 .sr(1)
35316 .m(2)
35317 .n(n)
35318 .k(k)
35319 .cn_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080035320 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080035321 }
35322 }
35323}
35324
35325TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4_subtile) {
35326 for (uint32_t n = 8; n <= 12; n += 4) {
35327 for (size_t k = 1; k <= 5; k += 2) {
35328 for (uint32_t m = 1; m <= 2; m++) {
35329 GemmMicrokernelTester()
35330 .mr(2)
35331 .nr(4)
35332 .kr(1)
35333 .sr(1)
35334 .m(m)
35335 .n(n)
35336 .k(k)
35337 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080035338 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080035339 }
35340 }
35341 }
35342}
35343
35344TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, small_kernel) {
35345 for (size_t k = 1; k <= 5; k += 2) {
35346 GemmMicrokernelTester()
35347 .mr(2)
35348 .nr(4)
35349 .kr(1)
35350 .sr(1)
35351 .m(2)
35352 .n(4)
35353 .k(k)
35354 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080035355 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080035356 }
35357}
35358
35359TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, small_kernel_subtile) {
35360 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080035361 for (uint32_t n = 1; n <= 4; n++) {
35362 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080035363 GemmMicrokernelTester()
35364 .mr(2)
35365 .nr(4)
35366 .kr(1)
35367 .sr(1)
35368 .m(m)
35369 .n(n)
35370 .k(k)
35371 .ks(3)
35372 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080035373 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080035374 }
35375 }
35376 }
35377}
35378
35379TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4_small_kernel) {
35380 for (uint32_t n = 5; n < 8; n++) {
35381 for (size_t k = 1; k <= 5; k += 2) {
35382 GemmMicrokernelTester()
35383 .mr(2)
35384 .nr(4)
35385 .kr(1)
35386 .sr(1)
35387 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080035388 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080035389 .k(k)
35390 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080035391 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080035392 }
35393 }
35394}
35395
35396TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4_small_kernel) {
35397 for (uint32_t n = 8; n <= 12; n += 4) {
35398 for (size_t k = 1; k <= 5; k += 2) {
35399 GemmMicrokernelTester()
35400 .mr(2)
35401 .nr(4)
35402 .kr(1)
35403 .sr(1)
35404 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080035405 .n(n)
Marat Dukhan272d4d92022-01-04 15:07:14 -080035406 .k(k)
35407 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080035408 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080035409 }
35410 }
35411}
35412
35413TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, strided_cm_subtile) {
35414 for (size_t k = 1; k <= 5; k += 2) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080035415 for (uint32_t n = 1; n <= 4; n++) {
35416 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080035417 GemmMicrokernelTester()
35418 .mr(2)
35419 .nr(4)
35420 .kr(1)
35421 .sr(1)
35422 .m(m)
35423 .n(n)
35424 .k(k)
35425 .cm_stride(7)
35426 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080035427 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080035428 }
35429 }
35430 }
35431}
35432
35433TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, a_offset) {
35434 for (size_t k = 1; k <= 5; k += 2) {
35435 GemmMicrokernelTester()
35436 .mr(2)
35437 .nr(4)
35438 .kr(1)
35439 .sr(1)
35440 .m(2)
35441 .n(4)
35442 .k(k)
35443 .ks(3)
35444 .a_offset(13)
Marat Dukhan50323b82022-01-11 00:12:01 -080035445 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080035446 }
35447}
35448
35449TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, zero) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080035450 for (size_t k = 1; k <= 5; k += 2) {
35451 for (uint32_t mz = 0; mz < 2; mz++) {
Marat Dukhan272d4d92022-01-04 15:07:14 -080035452 GemmMicrokernelTester()
35453 .mr(2)
35454 .nr(4)
35455 .kr(1)
35456 .sr(1)
35457 .m(2)
35458 .n(4)
35459 .k(k)
35460 .ks(3)
35461 .a_offset(13)
35462 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080035463 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080035464 }
35465 }
35466}
35467
35468TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, qmin) {
35469 GemmMicrokernelTester()
35470 .mr(2)
35471 .nr(4)
35472 .kr(1)
35473 .sr(1)
35474 .m(2)
35475 .n(4)
35476 .k(1)
35477 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080035478 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080035479}
35480
35481TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, qmax) {
35482 GemmMicrokernelTester()
35483 .mr(2)
35484 .nr(4)
35485 .kr(1)
35486 .sr(1)
35487 .m(2)
35488 .n(4)
35489 .k(1)
35490 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080035491 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080035492}
35493
35494TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, strided_cm) {
35495 GemmMicrokernelTester()
35496 .mr(2)
35497 .nr(4)
35498 .kr(1)
35499 .sr(1)
35500 .m(2)
35501 .n(4)
35502 .k(1)
35503 .cm_stride(7)
Marat Dukhan50323b82022-01-11 00:12:01 -080035504 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080035505}
35506
35507TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, no_a_zero_point) {
35508 for (size_t k = 1; k <= 5; k += 2) {
35509 GemmMicrokernelTester()
35510 .mr(2)
35511 .nr(4)
35512 .kr(1)
35513 .sr(1)
35514 .m(2)
35515 .n(4)
35516 .k(k)
35517 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080035518 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080035519 }
35520}
35521
35522TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, no_b_zero_point) {
35523 for (size_t k = 1; k <= 5; k += 2) {
35524 GemmMicrokernelTester()
35525 .mr(2)
35526 .nr(4)
35527 .kr(1)
35528 .sr(1)
35529 .m(2)
35530 .n(4)
35531 .k(k)
35532 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080035533 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080035534 }
35535}
35536
35537TEST(QU8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, no_zero_point) {
35538 for (size_t k = 1; k <= 5; k += 2) {
35539 GemmMicrokernelTester()
35540 .mr(2)
35541 .nr(4)
35542 .kr(1)
35543 .sr(1)
35544 .m(2)
35545 .n(4)
35546 .k(k)
35547 .a_zero_point(0)
35548 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080035549 .Test(xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params, xnn_qu8_requantize_fp32);
Marat Dukhan272d4d92022-01-04 15:07:14 -080035550 }
Marat Dukhan927d4742021-07-15 13:42:49 -070035551}